From a9170eb3ef5b9d83b956282e18a043cbad9ab443 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Fri, 20 Mar 2026 22:28:56 -0500 Subject: [PATCH 01/14] feat(cli): add preliminary openclaw migration command --- astrbot/cli/__main__.py | 3 +- astrbot/cli/commands/__init__.py | 3 +- astrbot/cli/commands/cmd_migrate.py | 605 ++++++++++++++++++++++++ docs/en/deploy/astrbot/cli.md | 14 + docs/zh/deploy/astrbot/cli.md | 14 + tests/unit/test_cli_openclaw_migrate.py | 145 ++++++ 6 files changed, 782 insertions(+), 2 deletions(-) create mode 100644 astrbot/cli/commands/cmd_migrate.py create mode 100644 tests/unit/test_cli_openclaw_migrate.py diff --git a/astrbot/cli/__main__.py b/astrbot/cli/__main__.py index 6d48ec28d5..f10f14ca59 100644 --- a/astrbot/cli/__main__.py +++ b/astrbot/cli/__main__.py @@ -5,7 +5,7 @@ import click from . import __version__ -from .commands import conf, init, plug, run +from .commands import conf, init, migrate, plug, run logo_tmpl = r""" ___ _______.___________..______ .______ ______ .___________. @@ -54,6 +54,7 @@ def help(command_name: str | None) -> None: cli.add_command(help) cli.add_command(plug) cli.add_command(conf) +cli.add_command(migrate) if __name__ == "__main__": cli() diff --git a/astrbot/cli/commands/__init__.py b/astrbot/cli/commands/__init__.py index 1d3e0bca2f..fdabff70c4 100644 --- a/astrbot/cli/commands/__init__.py +++ b/astrbot/cli/commands/__init__.py @@ -1,6 +1,7 @@ from .cmd_conf import conf from .cmd_init import init +from .cmd_migrate import migrate from .cmd_plug import plug from .cmd_run import run -__all__ = ["conf", "init", "plug", "run"] +__all__ = ["conf", "init", "migrate", "plug", "run"] diff --git a/astrbot/cli/commands/cmd_migrate.py b/astrbot/cli/commands/cmd_migrate.py new file mode 100644 index 0000000000..a74284e5ef --- /dev/null +++ b/astrbot/cli/commands/cmd_migrate.py @@ -0,0 +1,605 @@ +from __future__ import annotations + +import datetime as dt +import json +import shutil +import sqlite3 +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + +import click + +from ..utils import check_astrbot_root, get_astrbot_root + +SQLITE_KEY_CANDIDATES = ("key", "id", "name") +SQLITE_CONTENT_CANDIDATES = ("content", "value", "text", "memory") +SQLITE_CATEGORY_CANDIDATES = ("category", "kind", "type") +SQLITE_TS_CANDIDATES = ("updated_at", "created_at", "timestamp", "ts", "time") + + +@dataclass(slots=True) +class MemoryEntry: + key: str + content: str + category: str + timestamp: str | None + source: str + order: int + + +@dataclass(slots=True) +class MigrationReport: + source_root: str + source_workspace: str + target_dir: str | None + dry_run: bool + memory_entries_total: int + memory_entries_from_sqlite: int + memory_entries_from_markdown: int + workspace_files_total: int + workspace_bytes_total: int + config_found: bool + copied_workspace_files: int + copied_memory_entries: int + wrote_timeline: bool + wrote_config_toml: bool + + +def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str | None: + for candidate in candidates: + if candidate in columns: + return candidate + return None + + +def _normalize_timestamp(raw: Any) -> str | None: + if raw is None: + return None + + if isinstance(raw, (int, float)): + ts = float(raw) + if ts > 1e12: + ts = ts / 1000.0 + try: + return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() + except Exception: + return str(raw) + + text = str(raw).strip() + if not text: + return None + + if text.isdigit(): + return _normalize_timestamp(int(text)) + + maybe_iso = text.replace("Z", "+00:00") + try: + parsed = dt.datetime.fromisoformat(maybe_iso) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=dt.timezone.utc) + return parsed.isoformat() + except Exception: + return text + + +def _normalize_key(raw: Any, fallback_idx: int) -> str: + text = str(raw).strip() if raw is not None else "" + if text: + return text + return f"openclaw_{fallback_idx}" + + +def _parse_structured_line(line: str) -> tuple[str, str] | None: + if not line.startswith("**"): + return None + rest = line[2:] + marker = "**:" + marker_idx = rest.find(marker) + if marker_idx <= 0: + return None + key = rest[:marker_idx].strip() + value = rest[marker_idx + len(marker) :].strip() + if not key or not value: + return None + return key, value + + +def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: + if not db_path.exists(): + return [] + + conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) + try: + cursor = conn.cursor() + table_exists = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='memories' LIMIT 1" + ).fetchone() + if table_exists is None: + return [] + + columns = { + str(row[1]).strip().lower() + for row in cursor.execute("PRAGMA table_info(memories)").fetchall() + } + + key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) or "rowid" + content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) + if content_col is None: + raise click.ClickException( + f"OpenClaw sqlite exists at {db_path}, but no content-like column found" + ) + category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) + ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) + + selected_cols = [key_col, content_col] + if category_col is not None: + selected_cols.append(category_col) + if ts_col is not None and ts_col not in selected_cols: + selected_cols.append(ts_col) + + sql = "SELECT " + ", ".join(selected_cols) + " FROM memories" + rows = cursor.execute(sql).fetchall() + + entries: list[MemoryEntry] = [] + for idx, row in enumerate(rows): + row_values = list(row) + key_raw = row_values[0] if row_values else None + content_raw = row_values[1] if len(row_values) > 1 else "" + category_raw = row_values[2] if category_col is not None and len(row_values) > 2 else "core" + ts_index = len(row_values) - 1 if ts_col is not None else -1 + ts_raw = row_values[ts_index] if ts_col is not None and row_values else None + + content = str(content_raw).strip() + if not content: + continue + + entries.append( + MemoryEntry( + key=_normalize_key(key_raw, idx), + content=content, + category=str(category_raw or "core").strip().lower() or "core", + timestamp=_normalize_timestamp(ts_raw), + source=f"sqlite:{db_path}", + order=idx, + ) + ) + + return entries + finally: + conn.close() + + +def _parse_markdown_file( + path: Path, default_category: str, stem: str, order_offset: int +) -> list[MemoryEntry]: + content = path.read_text(encoding="utf-8", errors="ignore") + mtime = _normalize_timestamp(path.stat().st_mtime) + entries: list[MemoryEntry] = [] + line_no = 0 + for raw_line in content.splitlines(): + line_no += 1 + stripped = raw_line.strip() + if not stripped or stripped.startswith("#"): + continue + + line = stripped[2:] if stripped.startswith("- ") else stripped + parsed = _parse_structured_line(line) + if parsed is not None: + key, text = parsed + key = _normalize_key(key, line_no) + body = text.strip() + else: + key = f"openclaw_{stem}_{line_no}" + body = line.strip() + + if not body: + continue + + entries.append( + MemoryEntry( + key=key, + content=body, + category=default_category, + timestamp=mtime, + source=f"markdown:{path}", + order=order_offset + len(entries), + ) + ) + return entries + + +def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: + entries: list[MemoryEntry] = [] + + core_path = workspace_dir / "MEMORY.md" + if core_path.exists(): + entries.extend( + _parse_markdown_file( + core_path, + default_category="core", + stem="core", + order_offset=len(entries), + ) + ) + + daily_dir = workspace_dir / "memory" + if daily_dir.exists(): + for md_path in sorted(daily_dir.glob("*.md")): + stem = md_path.stem or "daily" + entries.extend( + _parse_markdown_file( + md_path, + default_category="daily", + stem=stem, + order_offset=len(entries), + ) + ) + + return entries + + +def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: + seen: set[str] = set() + deduped: list[MemoryEntry] = [] + for item in entries: + signature = "\x00".join( + [ + item.key.strip(), + item.content.strip(), + item.category.strip(), + item.timestamp or "", + ] + ) + if signature in seen: + continue + seen.add(signature) + deduped.append(item) + return deduped + + +def _find_source_workspace(source_root: Path) -> Path: + candidate = source_root / "workspace" + if candidate.exists() and candidate.is_dir(): + return candidate + return source_root + + +def _find_openclaw_config_json(source_root: Path, workspace_dir: Path) -> Path | None: + candidates = [ + source_root / "config.json", + source_root / "settings.json", + workspace_dir / "config.json", + workspace_dir / "settings.json", + ] + for candidate in candidates: + if candidate.exists() and candidate.is_file(): + return candidate + return None + + +def _collect_workspace_files(workspace_dir: Path) -> list[Path]: + files: list[Path] = [] + for path in workspace_dir.rglob("*"): + if path.is_file() and not path.is_symlink(): + files.append(path) + return sorted(files) + + +def _toml_escape(value: str) -> str: + escaped = value.replace("\\", "\\\\").replace('"', '\\"') + escaped = escaped.replace("\n", "\\n") + return f'"{escaped}"' + + +def _toml_literal(value: Any) -> str: + if value is None: + return '"__NULL__"' + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, int): + return str(value) + if isinstance(value, float): + return repr(value) + if isinstance(value, str): + return _toml_escape(value) + if isinstance(value, list): + return "[" + ", ".join(_toml_literal(v) for v in value) + "]" + if isinstance(value, dict): + return "{ " + ", ".join(f"{k} = {_toml_literal(v)}" for k, v in value.items()) + " }" + return _toml_escape(str(value)) + + +def _json_to_toml(data: dict[str, Any]) -> str: + lines: list[str] = [] + + def emit_table(obj: dict[str, Any], path: list[str]) -> None: + scalar_items: list[tuple[str, Any]] = [] + nested_dicts: list[tuple[str, dict[str, Any]]] = [] + array_tables: list[tuple[str, list[dict[str, Any]]]] = [] + + for key, value in obj.items(): + if isinstance(value, dict): + nested_dicts.append((key, value)) + elif isinstance(value, list) and value and all( + isinstance(item, dict) for item in value + ): + array_tables.append((key, value)) + else: + scalar_items.append((key, value)) + + if path: + lines.append(f"[{'.'.join(path)}]") + for key, value in scalar_items: + lines.append(f"{key} = {_toml_literal(value)}") + if scalar_items and (nested_dicts or array_tables): + lines.append("") + + for idx, (key, value) in enumerate(nested_dicts): + emit_table(value, [*path, key]) + if idx != len(nested_dicts) - 1 or array_tables: + lines.append("") + + for t_idx, (key, items) in enumerate(array_tables): + for item in items: + lines.append(f"[[{'.'.join([*path, key])}]]") + for sub_key, sub_value in item.items(): + lines.append(f"{sub_key} = {_toml_literal(sub_value)}") + lines.append("") + if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": + lines.pop() + + emit_table(data, []) + if not lines: + return "" + return "\n".join(lines).rstrip() + "\n" + + +def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: + with path.open("w", encoding="utf-8") as fp: + for entry in entries: + fp.write( + json.dumps( + { + "key": entry.key, + "content": entry.content, + "category": entry.category, + "timestamp": entry.timestamp, + "source": entry.source, + }, + ensure_ascii=False, + ) + + "\n" + ) + + +def _write_timeline(path: Path, entries: list[MemoryEntry], source_root: Path) -> None: + ordered = sorted( + entries, + key=lambda e: ( + e.timestamp or "", + e.order, + ), + ) + + lines: list[str] = [] + lines.append("# OpenClaw Migration - Time Brief History") + lines.append("") + lines.append("> 时间简史(初步方案):按时间汇总可迁移记忆条目。") + lines.append("") + lines.append(f"- Generated at: {dt.datetime.now(dt.timezone.utc).isoformat()}") + lines.append(f"- Source: `{source_root}`") + lines.append(f"- Total entries: {len(ordered)}") + lines.append("") + lines.append("## Timeline") + lines.append("") + + for entry in ordered: + ts = entry.timestamp or "unknown" + snippet = entry.content.replace("\n", " ").strip() + if len(snippet) > 160: + snippet = snippet[:157] + "..." + lines.append(f"- [{ts}] ({entry.category}) `{entry.key}`: {snippet}") + + lines.append("") + path.write_text("\n".join(lines), encoding="utf-8") + + +def run_openclaw_migration( + *, + source_root: Path, + astrbot_root: Path, + dry_run: bool = False, + target_dir: Path | None = None, +) -> MigrationReport: + if not source_root.exists() or not source_root.is_dir(): + raise click.ClickException(f"OpenClaw source not found: {source_root}") + + if not check_astrbot_root(astrbot_root): + raise click.ClickException( + f"{astrbot_root} is not a valid AstrBot root. Run from initialized AstrBot root." + ) + + workspace_dir = _find_source_workspace(source_root) + sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") + markdown_entries = _read_openclaw_markdown_entries(workspace_dir) + memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) + + workspace_files = _collect_workspace_files(workspace_dir) + workspace_total_bytes = sum(path.stat().st_size for path in workspace_files) + + config_json_path = _find_openclaw_config_json(source_root, workspace_dir) + config_obj: dict[str, Any] | None = None + if config_json_path is not None: + config_obj = json.loads(config_json_path.read_text(encoding="utf-8")) + + resolved_target: Path | None = None + if target_dir is not None: + resolved_target = ( + target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) + ) + elif not dry_run: + run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") + resolved_target = ( + astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" + ) + + copied_workspace_files = 0 + copied_memory_entries = 0 + wrote_timeline = False + wrote_config_toml = False + + if not dry_run and resolved_target is not None: + resolved_target.mkdir(parents=True, exist_ok=True) + workspace_target = resolved_target / "workspace" + workspace_target.mkdir(parents=True, exist_ok=True) + + for src_file in workspace_files: + rel_path = src_file.relative_to(workspace_dir) + dst_file = workspace_target / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src_file, dst_file) + copied_workspace_files += 1 + + if memory_entries: + _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) + copied_memory_entries = len(memory_entries) + _write_timeline( + resolved_target / "time_brief_history.md", + memory_entries, + source_root, + ) + wrote_timeline = True + + if config_obj is not None: + (resolved_target / "config.original.json").write_text( + json.dumps(config_obj, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + (resolved_target / "config.migrated.toml").write_text( + _json_to_toml(config_obj), + encoding="utf-8", + ) + wrote_config_toml = True + + summary = MigrationReport( + source_root=str(source_root), + source_workspace=str(workspace_dir), + target_dir=str(resolved_target), + dry_run=dry_run, + memory_entries_total=len(memory_entries), + memory_entries_from_sqlite=len(sqlite_entries), + memory_entries_from_markdown=len(markdown_entries), + workspace_files_total=len(workspace_files), + workspace_bytes_total=workspace_total_bytes, + config_found=config_obj is not None, + copied_workspace_files=copied_workspace_files, + copied_memory_entries=copied_memory_entries, + wrote_timeline=wrote_timeline, + wrote_config_toml=wrote_config_toml, + ) + (resolved_target / "migration_summary.json").write_text( + json.dumps(asdict(summary), ensure_ascii=False, indent=2), + encoding="utf-8", + ) + return summary + + return MigrationReport( + source_root=str(source_root), + source_workspace=str(workspace_dir), + target_dir=str(resolved_target) if resolved_target else None, + dry_run=dry_run, + memory_entries_total=len(memory_entries), + memory_entries_from_sqlite=len(sqlite_entries), + memory_entries_from_markdown=len(markdown_entries), + workspace_files_total=len(workspace_files), + workspace_bytes_total=workspace_total_bytes, + config_found=config_obj is not None, + copied_workspace_files=0, + copied_memory_entries=0, + wrote_timeline=False, + wrote_config_toml=False, + ) + + +@click.group(name="migrate") +def migrate() -> None: + """Data migration utilities for external runtimes.""" + + +@migrate.command(name="openclaw") +@click.option( + "--source", + "source_path", + type=click.Path(path_type=Path, file_okay=False, resolve_path=True), + default=None, + help="Path to OpenClaw root directory (default: ~/.openclaw).", +) +@click.option( + "--target", + "target_path", + type=click.Path(path_type=Path, file_okay=False, resolve_path=True), + default=None, + help=( + "Custom output directory. If omitted, writes to " + "data/migrations/openclaw/run-." + ), +) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Preview migration candidates without writing files.", +) +def migrate_openclaw( + source_path: Path | None, + target_path: Path | None, + dry_run: bool, +) -> None: + """Migrate OpenClaw workspace snapshots into AstrBot migration artifacts.""" + + astrbot_root = get_astrbot_root() + source_root = source_path or (Path.home() / ".openclaw") + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=dry_run, + target_dir=target_path, + ) + + click.echo("OpenClaw migration report:") + click.echo(f" Source root: {report.source_root}") + click.echo(f" Source workspace: {report.source_workspace}") + click.echo(f" Dry run: {report.dry_run}") + click.echo(f" Memory entries: {report.memory_entries_total}") + click.echo(f" - sqlite: {report.memory_entries_from_sqlite}") + click.echo(f" - markdown: {report.memory_entries_from_markdown}") + click.echo(f" Workspace files: {report.workspace_files_total}") + click.echo(f" Workspace size: {report.workspace_bytes_total} bytes") + click.echo(f" Config found: {report.config_found}") + + if dry_run: + click.echo("") + click.echo("Dry-run mode: no files were written.") + click.echo("Run without --dry-run to perform migration.") + return + + click.echo("") + click.echo(f"Migration output: {report.target_dir}") + click.echo(f" Copied files: {report.copied_workspace_files}") + click.echo(f" Imported memories: {report.copied_memory_entries}") + click.echo(f" Timeline written: {report.wrote_timeline}") + click.echo(f" Config TOML written:{report.wrote_config_toml}") + click.echo("Done.") + + +__all__ = [ + "MigrationReport", + "MemoryEntry", + "_json_to_toml", + "_read_openclaw_sqlite_entries", + "migrate", + "run_openclaw_migration", +] + diff --git a/docs/en/deploy/astrbot/cli.md b/docs/en/deploy/astrbot/cli.md index 857e0d6a61..7ac10c2a21 100644 --- a/docs/en/deploy/astrbot/cli.md +++ b/docs/en/deploy/astrbot/cli.md @@ -90,3 +90,17 @@ If there are no errors, you will see a log message similar to `🌈 Dashboard st Next, you need to deploy any messaging platform to use AstrBot on that platform. + +## OpenClaw Migration (Preliminary) + +If you previously used OpenClaw, AstrBot now provides a preliminary migration command that can import OpenClaw workspace snapshots into AstrBot migration artifacts (including memory entries, workspace files, config conversion, and a generated `time_brief_history.md`): + +```bash +# Preview only (no write) +astrbot migrate openclaw --dry-run + +# Execute migration +astrbot migrate openclaw +``` + +By default, AstrBot reads from `~/.openclaw` and writes migration artifacts to `data/migrations/openclaw/run-` under your AstrBot root. diff --git a/docs/zh/deploy/astrbot/cli.md b/docs/zh/deploy/astrbot/cli.md index 623eb583fb..44392d857a 100644 --- a/docs/zh/deploy/astrbot/cli.md +++ b/docs/zh/deploy/astrbot/cli.md @@ -90,3 +90,17 @@ python main.py 接下来,你需要部署任何一个消息平台,才能够实现在消息平台上使用 AstrBot。 + +## OpenClaw 迁移(初步方案) + +如果你之前使用过 OpenClaw,AstrBot 现在提供了一个初步迁移命令,可将 OpenClaw 工作区快照导入为 AstrBot 迁移产物(包含记忆条目、工作区文件、配置转换和自动生成的 `time_brief_history.md`): + +```bash +# 仅预览(不写入) +astrbot migrate openclaw --dry-run + +# 执行迁移 +astrbot migrate openclaw +``` + +默认从 `~/.openclaw` 读取,并将迁移结果写入 AstrBot 根目录下 `data/migrations/openclaw/run-`。 diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py new file mode 100644 index 0000000000..8c2ba1e642 --- /dev/null +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path + +from astrbot.cli.commands.cmd_migrate import ( + _read_openclaw_sqlite_entries, + run_openclaw_migration, +) + + +def _prepare_astrbot_root(root: Path) -> None: + (root / ".astrbot").touch() + (root / "data").mkdir(parents=True, exist_ok=True) + + +def _prepare_openclaw_source(source_root: Path) -> None: + workspace = source_root / "workspace" + (workspace / "memory").mkdir(parents=True, exist_ok=True) + (workspace / "notes").mkdir(parents=True, exist_ok=True) + + db_path = workspace / "memory" / "brain.db" + conn = sqlite3.connect(db_path) + try: + conn.execute( + "CREATE TABLE memories (id TEXT, value TEXT, type TEXT, updated_at INTEGER)" + ) + conn.execute( + "INSERT INTO memories (id, value, type, updated_at) VALUES (?, ?, ?, ?)", + ("user_pref", "likes rust", "core", 1700000000), + ) + conn.commit() + finally: + conn.close() + + (workspace / "MEMORY.md").write_text( + "# Memory\n- **style**: concise\n- keep logs\n", + encoding="utf-8", + ) + (workspace / "memory" / "2026-03-20.md").write_text( + "- **todo**: migrate artifacts\n", + encoding="utf-8", + ) + (workspace / "notes" / "readme.txt").write_text( + "workspace artifact", + encoding="utf-8", + ) + (source_root / "config.json").write_text( + json.dumps( + { + "model": "gpt-4.1-mini", + "memory": {"enabled": True, "limit": 4096}, + "skills": [{"name": "planner", "enabled": True}], + } + ), + encoding="utf-8", + ) + + +def test_read_openclaw_sqlite_entries_supports_legacy_columns(tmp_path: Path) -> None: + db_dir = tmp_path / "memory" + db_dir.mkdir(parents=True) + db_path = db_dir / "brain.db" + conn = sqlite3.connect(db_path) + try: + conn.execute( + "CREATE TABLE memories (id TEXT, value TEXT, type TEXT, updated_at INTEGER)" + ) + conn.execute( + "INSERT INTO memories (id, value, type, updated_at) VALUES (?, ?, ?, ?)", + ("legacy_key", "legacy_value", "daily", 1700000000), + ) + conn.commit() + finally: + conn.close() + + entries = _read_openclaw_sqlite_entries(db_path) + assert len(entries) == 1 + assert entries[0].key == "legacy_key" + assert entries[0].content == "legacy_value" + assert entries[0].category == "daily" + assert entries[0].timestamp is not None + + +def test_run_openclaw_migration_dry_run(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=True, + ) + + assert report.dry_run is True + assert report.memory_entries_total >= 3 + assert report.workspace_files_total >= 3 + assert report.config_found is True + assert report.target_dir is None + assert not (astrbot_root / "data" / "migrations" / "openclaw").exists() + + +def test_run_openclaw_migration_writes_artifacts(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-run"), + ) + + assert report.dry_run is False + assert report.target_dir is not None + target = Path(report.target_dir) + assert target.exists() + + assert (target / "migration_summary.json").exists() + assert (target / "memory_entries.jsonl").exists() + assert (target / "time_brief_history.md").exists() + assert (target / "config.original.json").exists() + assert (target / "config.migrated.toml").exists() + assert (target / "workspace" / "notes" / "readme.txt").exists() + + timeline = (target / "time_brief_history.md").read_text(encoding="utf-8") + assert "Time Brief History" in timeline + assert "时间简史" in timeline + + toml_text = (target / "config.migrated.toml").read_text(encoding="utf-8") + assert "model = " in toml_text + assert "[memory]" in toml_text + assert "[[skills]]" in toml_text + From c24b07a5093ed5fe93f18c1f88b7c044186490e2 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Fri, 20 Mar 2026 23:15:44 -0500 Subject: [PATCH 02/14] refactor(cli): harden openclaw migration tool and tests --- astrbot/cli/commands/cmd_migrate.py | 524 +-------------------- astrbot/cli/utils/__init__.py | 8 + astrbot/cli/utils/openclaw_migrate.py | 586 ++++++++++++++++++++++++ tests/unit/test_cli_openclaw_migrate.py | 123 ++++- 4 files changed, 722 insertions(+), 519 deletions(-) create mode 100644 astrbot/cli/utils/openclaw_migrate.py diff --git a/astrbot/cli/commands/cmd_migrate.py b/astrbot/cli/commands/cmd_migrate.py index a74284e5ef..4e74cf260f 100644 --- a/astrbot/cli/commands/cmd_migrate.py +++ b/astrbot/cli/commands/cmd_migrate.py @@ -1,525 +1,17 @@ from __future__ import annotations -import datetime as dt -import json -import shutil -import sqlite3 -from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any import click -from ..utils import check_astrbot_root, get_astrbot_root - -SQLITE_KEY_CANDIDATES = ("key", "id", "name") -SQLITE_CONTENT_CANDIDATES = ("content", "value", "text", "memory") -SQLITE_CATEGORY_CANDIDATES = ("category", "kind", "type") -SQLITE_TS_CANDIDATES = ("updated_at", "created_at", "timestamp", "ts", "time") - - -@dataclass(slots=True) -class MemoryEntry: - key: str - content: str - category: str - timestamp: str | None - source: str - order: int - - -@dataclass(slots=True) -class MigrationReport: - source_root: str - source_workspace: str - target_dir: str | None - dry_run: bool - memory_entries_total: int - memory_entries_from_sqlite: int - memory_entries_from_markdown: int - workspace_files_total: int - workspace_bytes_total: int - config_found: bool - copied_workspace_files: int - copied_memory_entries: int - wrote_timeline: bool - wrote_config_toml: bool - - -def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str | None: - for candidate in candidates: - if candidate in columns: - return candidate - return None - - -def _normalize_timestamp(raw: Any) -> str | None: - if raw is None: - return None - - if isinstance(raw, (int, float)): - ts = float(raw) - if ts > 1e12: - ts = ts / 1000.0 - try: - return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() - except Exception: - return str(raw) - - text = str(raw).strip() - if not text: - return None - - if text.isdigit(): - return _normalize_timestamp(int(text)) - - maybe_iso = text.replace("Z", "+00:00") - try: - parsed = dt.datetime.fromisoformat(maybe_iso) - if parsed.tzinfo is None: - parsed = parsed.replace(tzinfo=dt.timezone.utc) - return parsed.isoformat() - except Exception: - return text - - -def _normalize_key(raw: Any, fallback_idx: int) -> str: - text = str(raw).strip() if raw is not None else "" - if text: - return text - return f"openclaw_{fallback_idx}" - - -def _parse_structured_line(line: str) -> tuple[str, str] | None: - if not line.startswith("**"): - return None - rest = line[2:] - marker = "**:" - marker_idx = rest.find(marker) - if marker_idx <= 0: - return None - key = rest[:marker_idx].strip() - value = rest[marker_idx + len(marker) :].strip() - if not key or not value: - return None - return key, value - - -def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: - if not db_path.exists(): - return [] - - conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) - try: - cursor = conn.cursor() - table_exists = cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='memories' LIMIT 1" - ).fetchone() - if table_exists is None: - return [] - - columns = { - str(row[1]).strip().lower() - for row in cursor.execute("PRAGMA table_info(memories)").fetchall() - } - - key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) or "rowid" - content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) - if content_col is None: - raise click.ClickException( - f"OpenClaw sqlite exists at {db_path}, but no content-like column found" - ) - category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) - ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) - - selected_cols = [key_col, content_col] - if category_col is not None: - selected_cols.append(category_col) - if ts_col is not None and ts_col not in selected_cols: - selected_cols.append(ts_col) - - sql = "SELECT " + ", ".join(selected_cols) + " FROM memories" - rows = cursor.execute(sql).fetchall() - - entries: list[MemoryEntry] = [] - for idx, row in enumerate(rows): - row_values = list(row) - key_raw = row_values[0] if row_values else None - content_raw = row_values[1] if len(row_values) > 1 else "" - category_raw = row_values[2] if category_col is not None and len(row_values) > 2 else "core" - ts_index = len(row_values) - 1 if ts_col is not None else -1 - ts_raw = row_values[ts_index] if ts_col is not None and row_values else None - - content = str(content_raw).strip() - if not content: - continue - - entries.append( - MemoryEntry( - key=_normalize_key(key_raw, idx), - content=content, - category=str(category_raw or "core").strip().lower() or "core", - timestamp=_normalize_timestamp(ts_raw), - source=f"sqlite:{db_path}", - order=idx, - ) - ) - - return entries - finally: - conn.close() - - -def _parse_markdown_file( - path: Path, default_category: str, stem: str, order_offset: int -) -> list[MemoryEntry]: - content = path.read_text(encoding="utf-8", errors="ignore") - mtime = _normalize_timestamp(path.stat().st_mtime) - entries: list[MemoryEntry] = [] - line_no = 0 - for raw_line in content.splitlines(): - line_no += 1 - stripped = raw_line.strip() - if not stripped or stripped.startswith("#"): - continue - - line = stripped[2:] if stripped.startswith("- ") else stripped - parsed = _parse_structured_line(line) - if parsed is not None: - key, text = parsed - key = _normalize_key(key, line_no) - body = text.strip() - else: - key = f"openclaw_{stem}_{line_no}" - body = line.strip() - - if not body: - continue - - entries.append( - MemoryEntry( - key=key, - content=body, - category=default_category, - timestamp=mtime, - source=f"markdown:{path}", - order=order_offset + len(entries), - ) - ) - return entries - - -def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: - entries: list[MemoryEntry] = [] - - core_path = workspace_dir / "MEMORY.md" - if core_path.exists(): - entries.extend( - _parse_markdown_file( - core_path, - default_category="core", - stem="core", - order_offset=len(entries), - ) - ) - - daily_dir = workspace_dir / "memory" - if daily_dir.exists(): - for md_path in sorted(daily_dir.glob("*.md")): - stem = md_path.stem or "daily" - entries.extend( - _parse_markdown_file( - md_path, - default_category="daily", - stem=stem, - order_offset=len(entries), - ) - ) - - return entries - - -def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: - seen: set[str] = set() - deduped: list[MemoryEntry] = [] - for item in entries: - signature = "\x00".join( - [ - item.key.strip(), - item.content.strip(), - item.category.strip(), - item.timestamp or "", - ] - ) - if signature in seen: - continue - seen.add(signature) - deduped.append(item) - return deduped - - -def _find_source_workspace(source_root: Path) -> Path: - candidate = source_root / "workspace" - if candidate.exists() and candidate.is_dir(): - return candidate - return source_root - - -def _find_openclaw_config_json(source_root: Path, workspace_dir: Path) -> Path | None: - candidates = [ - source_root / "config.json", - source_root / "settings.json", - workspace_dir / "config.json", - workspace_dir / "settings.json", - ] - for candidate in candidates: - if candidate.exists() and candidate.is_file(): - return candidate - return None - - -def _collect_workspace_files(workspace_dir: Path) -> list[Path]: - files: list[Path] = [] - for path in workspace_dir.rglob("*"): - if path.is_file() and not path.is_symlink(): - files.append(path) - return sorted(files) - - -def _toml_escape(value: str) -> str: - escaped = value.replace("\\", "\\\\").replace('"', '\\"') - escaped = escaped.replace("\n", "\\n") - return f'"{escaped}"' - - -def _toml_literal(value: Any) -> str: - if value is None: - return '"__NULL__"' - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, int): - return str(value) - if isinstance(value, float): - return repr(value) - if isinstance(value, str): - return _toml_escape(value) - if isinstance(value, list): - return "[" + ", ".join(_toml_literal(v) for v in value) + "]" - if isinstance(value, dict): - return "{ " + ", ".join(f"{k} = {_toml_literal(v)}" for k, v in value.items()) + " }" - return _toml_escape(str(value)) - - -def _json_to_toml(data: dict[str, Any]) -> str: - lines: list[str] = [] - - def emit_table(obj: dict[str, Any], path: list[str]) -> None: - scalar_items: list[tuple[str, Any]] = [] - nested_dicts: list[tuple[str, dict[str, Any]]] = [] - array_tables: list[tuple[str, list[dict[str, Any]]]] = [] - - for key, value in obj.items(): - if isinstance(value, dict): - nested_dicts.append((key, value)) - elif isinstance(value, list) and value and all( - isinstance(item, dict) for item in value - ): - array_tables.append((key, value)) - else: - scalar_items.append((key, value)) - - if path: - lines.append(f"[{'.'.join(path)}]") - for key, value in scalar_items: - lines.append(f"{key} = {_toml_literal(value)}") - if scalar_items and (nested_dicts or array_tables): - lines.append("") - - for idx, (key, value) in enumerate(nested_dicts): - emit_table(value, [*path, key]) - if idx != len(nested_dicts) - 1 or array_tables: - lines.append("") - - for t_idx, (key, items) in enumerate(array_tables): - for item in items: - lines.append(f"[[{'.'.join([*path, key])}]]") - for sub_key, sub_value in item.items(): - lines.append(f"{sub_key} = {_toml_literal(sub_value)}") - lines.append("") - if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": - lines.pop() - - emit_table(data, []) - if not lines: - return "" - return "\n".join(lines).rstrip() + "\n" - - -def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: - with path.open("w", encoding="utf-8") as fp: - for entry in entries: - fp.write( - json.dumps( - { - "key": entry.key, - "content": entry.content, - "category": entry.category, - "timestamp": entry.timestamp, - "source": entry.source, - }, - ensure_ascii=False, - ) - + "\n" - ) - - -def _write_timeline(path: Path, entries: list[MemoryEntry], source_root: Path) -> None: - ordered = sorted( - entries, - key=lambda e: ( - e.timestamp or "", - e.order, - ), - ) - - lines: list[str] = [] - lines.append("# OpenClaw Migration - Time Brief History") - lines.append("") - lines.append("> 时间简史(初步方案):按时间汇总可迁移记忆条目。") - lines.append("") - lines.append(f"- Generated at: {dt.datetime.now(dt.timezone.utc).isoformat()}") - lines.append(f"- Source: `{source_root}`") - lines.append(f"- Total entries: {len(ordered)}") - lines.append("") - lines.append("## Timeline") - lines.append("") - - for entry in ordered: - ts = entry.timestamp or "unknown" - snippet = entry.content.replace("\n", " ").strip() - if len(snippet) > 160: - snippet = snippet[:157] + "..." - lines.append(f"- [{ts}] ({entry.category}) `{entry.key}`: {snippet}") - - lines.append("") - path.write_text("\n".join(lines), encoding="utf-8") - - -def run_openclaw_migration( - *, - source_root: Path, - astrbot_root: Path, - dry_run: bool = False, - target_dir: Path | None = None, -) -> MigrationReport: - if not source_root.exists() or not source_root.is_dir(): - raise click.ClickException(f"OpenClaw source not found: {source_root}") - - if not check_astrbot_root(astrbot_root): - raise click.ClickException( - f"{astrbot_root} is not a valid AstrBot root. Run from initialized AstrBot root." - ) - - workspace_dir = _find_source_workspace(source_root) - sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") - markdown_entries = _read_openclaw_markdown_entries(workspace_dir) - memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) - - workspace_files = _collect_workspace_files(workspace_dir) - workspace_total_bytes = sum(path.stat().st_size for path in workspace_files) - - config_json_path = _find_openclaw_config_json(source_root, workspace_dir) - config_obj: dict[str, Any] | None = None - if config_json_path is not None: - config_obj = json.loads(config_json_path.read_text(encoding="utf-8")) - - resolved_target: Path | None = None - if target_dir is not None: - resolved_target = ( - target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) - ) - elif not dry_run: - run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") - resolved_target = ( - astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" - ) - - copied_workspace_files = 0 - copied_memory_entries = 0 - wrote_timeline = False - wrote_config_toml = False - - if not dry_run and resolved_target is not None: - resolved_target.mkdir(parents=True, exist_ok=True) - workspace_target = resolved_target / "workspace" - workspace_target.mkdir(parents=True, exist_ok=True) - - for src_file in workspace_files: - rel_path = src_file.relative_to(workspace_dir) - dst_file = workspace_target / rel_path - dst_file.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(src_file, dst_file) - copied_workspace_files += 1 - - if memory_entries: - _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) - copied_memory_entries = len(memory_entries) - _write_timeline( - resolved_target / "time_brief_history.md", - memory_entries, - source_root, - ) - wrote_timeline = True - - if config_obj is not None: - (resolved_target / "config.original.json").write_text( - json.dumps(config_obj, ensure_ascii=False, indent=2), - encoding="utf-8", - ) - (resolved_target / "config.migrated.toml").write_text( - _json_to_toml(config_obj), - encoding="utf-8", - ) - wrote_config_toml = True - - summary = MigrationReport( - source_root=str(source_root), - source_workspace=str(workspace_dir), - target_dir=str(resolved_target), - dry_run=dry_run, - memory_entries_total=len(memory_entries), - memory_entries_from_sqlite=len(sqlite_entries), - memory_entries_from_markdown=len(markdown_entries), - workspace_files_total=len(workspace_files), - workspace_bytes_total=workspace_total_bytes, - config_found=config_obj is not None, - copied_workspace_files=copied_workspace_files, - copied_memory_entries=copied_memory_entries, - wrote_timeline=wrote_timeline, - wrote_config_toml=wrote_config_toml, - ) - (resolved_target / "migration_summary.json").write_text( - json.dumps(asdict(summary), ensure_ascii=False, indent=2), - encoding="utf-8", - ) - return summary - - return MigrationReport( - source_root=str(source_root), - source_workspace=str(workspace_dir), - target_dir=str(resolved_target) if resolved_target else None, - dry_run=dry_run, - memory_entries_total=len(memory_entries), - memory_entries_from_sqlite=len(sqlite_entries), - memory_entries_from_markdown=len(markdown_entries), - workspace_files_total=len(workspace_files), - workspace_bytes_total=workspace_total_bytes, - config_found=config_obj is not None, - copied_workspace_files=0, - copied_memory_entries=0, - wrote_timeline=False, - wrote_config_toml=False, - ) +from ..utils import get_astrbot_root +from ..utils.openclaw_migrate import ( + MemoryEntry, + MigrationReport, + _json_to_toml, + _read_openclaw_sqlite_entries, + run_openclaw_migration, +) @click.group(name="migrate") diff --git a/astrbot/cli/utils/__init__.py b/astrbot/cli/utils/__init__.py index 3830682f0d..f0f131feea 100644 --- a/astrbot/cli/utils/__init__.py +++ b/astrbot/cli/utils/__init__.py @@ -3,16 +3,24 @@ check_dashboard, get_astrbot_root, ) +from .openclaw_migrate import ( + MemoryEntry, + MigrationReport, + run_openclaw_migration, +) from .plugin import PluginStatus, build_plug_list, get_git_repo, manage_plugin from .version_comparator import VersionComparator __all__ = [ "PluginStatus", "VersionComparator", + "MemoryEntry", + "MigrationReport", "build_plug_list", "check_astrbot_root", "check_dashboard", "get_astrbot_root", "get_git_repo", "manage_plugin", + "run_openclaw_migration", ] diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py new file mode 100644 index 0000000000..767456f9a3 --- /dev/null +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -0,0 +1,586 @@ +from __future__ import annotations + +import datetime as dt +import json +import shutil +import sqlite3 +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + +import click + +from .basic import check_astrbot_root + +SQLITE_KEY_CANDIDATES = ("key", "id", "name") +SQLITE_CONTENT_CANDIDATES = ("content", "value", "text", "memory") +SQLITE_CATEGORY_CANDIDATES = ("category", "kind", "type") +SQLITE_TS_CANDIDATES = ("updated_at", "created_at", "timestamp", "ts", "time") + + +@dataclass(slots=True) +class MemoryEntry: + key: str + content: str + category: str + timestamp: str | None + source: str + order: int + + +@dataclass(slots=True) +class MigrationReport: + source_root: str + source_workspace: str + target_dir: str | None + dry_run: bool + memory_entries_total: int + memory_entries_from_sqlite: int + memory_entries_from_markdown: int + workspace_files_total: int + workspace_bytes_total: int + config_found: bool + copied_workspace_files: int + copied_memory_entries: int + wrote_timeline: bool + wrote_config_toml: bool + + +def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str | None: + for candidate in candidates: + if candidate in columns: + return candidate + return None + + +def _normalize_timestamp(raw: Any) -> str | None: + if raw is None: + return None + + if isinstance(raw, (int, float)): + ts = float(raw) + if ts > 1e12: + ts /= 1000.0 + try: + return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() + except Exception: + return str(raw) + + text = str(raw).strip() + if not text: + return None + + if text.isdigit(): + ts = float(text) + if ts > 1e12: + ts /= 1000.0 + try: + return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() + except Exception: + return text + + maybe_iso = text.replace("Z", "+00:00") + try: + parsed = dt.datetime.fromisoformat(maybe_iso) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=dt.timezone.utc) + return parsed.isoformat() + except Exception: + return text + + +def _normalize_key(raw: Any, fallback_idx: int) -> str: + text = str(raw).strip() if raw is not None else "" + if text: + return text + return f"openclaw_{fallback_idx}" + + +def _parse_structured_line(line: str) -> tuple[str, str] | None: + if not line.startswith("**"): + return None + rest = line[2:] + marker = "**:" + marker_idx = rest.find(marker) + if marker_idx <= 0: + return None + key = rest[:marker_idx].strip() + value = rest[marker_idx + len(marker) :].strip() + if not key or not value: + return None + return key, value + + +def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: + if not db_path.exists(): + return [] + + conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) + conn.row_factory = sqlite3.Row + try: + cursor = conn.cursor() + table_exists = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='memories' LIMIT 1" + ).fetchone() + if table_exists is None: + return [] + + columns = { + str(row[1]).strip().lower() + for row in cursor.execute("PRAGMA table_info(memories)").fetchall() + } + + key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) or "rowid" + content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) + if content_col is None: + raise click.ClickException( + f"OpenClaw sqlite exists at {db_path}, but no content-like column found" + ) + category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) + ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) + + select_clauses = [ + f"{key_col} AS __key__", + f"{content_col} AS __content__", + ( + f"{category_col} AS __category__" + if category_col is not None + else "'core' AS __category__" + ), + f"{ts_col} AS __timestamp__" if ts_col is not None else "NULL AS __timestamp__", + ] + rows = cursor.execute( + "SELECT " + ", ".join(select_clauses) + " FROM memories" + ).fetchall() + + entries: list[MemoryEntry] = [] + for idx, row in enumerate(rows): + content = str(row["__content__"] or "").strip() + if not content: + continue + + entries.append( + MemoryEntry( + key=_normalize_key(row["__key__"], idx), + content=content, + category=str(row["__category__"] or "core").strip().lower() + or "core", + timestamp=_normalize_timestamp(row["__timestamp__"]), + source=f"sqlite:{db_path}", + order=idx, + ) + ) + + return entries + finally: + conn.close() + + +def _parse_markdown_file( + path: Path, default_category: str, stem: str, order_offset: int +) -> list[MemoryEntry]: + content = path.read_text(encoding="utf-8", errors="replace") + mtime = _normalize_timestamp(path.stat().st_mtime) + entries: list[MemoryEntry] = [] + line_no = 0 + for raw_line in content.splitlines(): + line_no += 1 + stripped = raw_line.strip() + if not stripped or stripped.startswith("#"): + continue + + line = stripped[2:] if stripped.startswith("- ") else stripped + parsed = _parse_structured_line(line) + if parsed is not None: + key, text = parsed + key = _normalize_key(key, line_no) + body = text.strip() + else: + key = f"openclaw_{stem}_{line_no}" + body = line.strip() + + if not body: + continue + + entries.append( + MemoryEntry( + key=key, + content=body, + category=default_category, + timestamp=mtime, + source=f"markdown:{path}", + order=order_offset + len(entries), + ) + ) + return entries + + +def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: + entries: list[MemoryEntry] = [] + + core_path = workspace_dir / "MEMORY.md" + if core_path.exists(): + entries.extend( + _parse_markdown_file( + core_path, + default_category="core", + stem="core", + order_offset=len(entries), + ) + ) + + daily_dir = workspace_dir / "memory" + if daily_dir.exists(): + for md_path in sorted(daily_dir.glob("*.md")): + stem = md_path.stem or "daily" + entries.extend( + _parse_markdown_file( + md_path, + default_category="daily", + stem=stem, + order_offset=len(entries), + ) + ) + + return entries + + +def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: + seen_exact: set[str] = set() + seen_semantic: set[str] = set() + deduped: list[MemoryEntry] = [] + for item in entries: + exact_signature = "\x00".join( + [ + item.key.strip(), + item.content.strip(), + item.category.strip(), + item.timestamp or "", + ] + ) + semantic_signature = "\x00".join( + [ + item.content.strip(), + item.category.strip(), + ] + ) + if exact_signature in seen_exact or semantic_signature in seen_semantic: + continue + seen_exact.add(exact_signature) + seen_semantic.add(semantic_signature) + deduped.append(item) + return deduped + + +def _find_source_workspace(source_root: Path) -> Path: + candidate = source_root / "workspace" + if candidate.exists() and candidate.is_dir(): + return candidate + return source_root + + +def _find_openclaw_config_json(source_root: Path, workspace_dir: Path) -> Path | None: + candidates = [ + source_root / "config.json", + source_root / "settings.json", + workspace_dir / "config.json", + workspace_dir / "settings.json", + ] + for candidate in candidates: + if candidate.exists() and candidate.is_file(): + return candidate + return None + + +def _collect_workspace_files(workspace_dir: Path) -> list[Path]: + files: list[Path] = [] + for path in workspace_dir.rglob("*"): + if path.is_file() and not path.is_symlink(): + files.append(path) + return sorted(files) + + +def _toml_escape(value: str) -> str: + return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + + +def _toml_quote(value: str) -> str: + return f'"{_toml_escape(value)}"' + + +def _toml_format_key(key: str) -> str: + return _toml_quote(key) + + +def _toml_literal(value: Any) -> str: + if value is None: + return '"__NULL__"' + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, int): + return str(value) + if isinstance(value, float): + return repr(value) + if isinstance(value, str): + return _toml_quote(value) + if isinstance(value, list): + return "[" + ", ".join(_toml_literal(v) for v in value) + "]" + if isinstance(value, dict): + pairs = ", ".join( + f"{_toml_format_key(str(k))} = {_toml_literal(v)}" for k, v in value.items() + ) + return "{ " + pairs + " }" + return _toml_quote(str(value)) + + +def _format_toml_path(path: list[str]) -> str: + return ".".join(_toml_format_key(str(part)) for part in path) + + +def _json_to_toml(data: dict[str, Any]) -> str: + lines: list[str] = [] + + def emit_table(obj: dict[str, Any], path: list[str]) -> None: + scalar_items: list[tuple[str, Any]] = [] + nested_dicts: list[tuple[str, dict[str, Any]]] = [] + array_tables: list[tuple[str, list[dict[str, Any]]]] = [] + + for key, value in obj.items(): + if isinstance(value, dict): + nested_dicts.append((str(key), value)) + elif isinstance(value, list) and value and all( + isinstance(item, dict) for item in value + ): + array_tables.append((str(key), value)) + else: + scalar_items.append((str(key), value)) + + if path: + lines.append(f"[{_format_toml_path(path)}]") + for key, value in scalar_items: + lines.append(f"{_toml_format_key(key)} = {_toml_literal(value)}") + if scalar_items and (nested_dicts or array_tables): + lines.append("") + + for idx, (key, value) in enumerate(nested_dicts): + emit_table(value, [*path, key]) + if idx != len(nested_dicts) - 1 or array_tables: + lines.append("") + + for t_idx, (key, items) in enumerate(array_tables): + table_path = [*path, key] + for item in items: + lines.append(f"[[{_format_toml_path(table_path)}]]") + for sub_key, sub_value in item.items(): + lines.append( + f"{_toml_format_key(str(sub_key))} = {_toml_literal(sub_value)}" + ) + lines.append("") + if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": + lines.pop() + + emit_table(data, []) + if not lines: + return "" + return "\n".join(lines).rstrip() + "\n" + + +def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: + with path.open("w", encoding="utf-8") as fp: + for entry in entries: + fp.write( + json.dumps( + { + "key": entry.key, + "content": entry.content, + "category": entry.category, + "timestamp": entry.timestamp, + "source": entry.source, + }, + ensure_ascii=False, + ) + + "\n" + ) + + +def _write_timeline(path: Path, entries: list[MemoryEntry], source_root: Path) -> None: + ordered = sorted(entries, key=lambda e: (e.timestamp or "", e.order)) + + lines: list[str] = [] + lines.append("# OpenClaw Migration - Time Brief History") + lines.append("") + lines.append("> 时间简史(初步方案):按时间汇总可迁移记忆条目。") + lines.append("") + lines.append(f"- Generated at: {dt.datetime.now(dt.timezone.utc).isoformat()}") + lines.append(f"- Source: `{source_root}`") + lines.append(f"- Total entries: {len(ordered)}") + lines.append("") + lines.append("## Timeline") + lines.append("") + + for entry in ordered: + ts = entry.timestamp or "unknown" + snippet = entry.content.replace("\n", " ").strip() + if len(snippet) > 160: + snippet = snippet[:157] + "..." + lines.append(f"- [{ts}] ({entry.category}) `{entry.key}`: {snippet}") + + lines.append("") + path.write_text("\n".join(lines), encoding="utf-8") + + +def _load_json_or_raise(path: Path) -> dict[str, Any]: + try: + return json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise click.ClickException( + f"Failed to parse OpenClaw config JSON at {path}: {exc.msg} " + f"(line {exc.lineno}, column {exc.colno})" + ) from exc + + +def _build_report( + *, + source_root: Path, + workspace_dir: Path, + target_dir: Path | None, + dry_run: bool, + sqlite_entries: list[MemoryEntry], + markdown_entries: list[MemoryEntry], + memory_entries: list[MemoryEntry], + workspace_files: list[Path], + workspace_bytes_total: int, + config_found: bool, + copied_workspace_files: int, + copied_memory_entries: int, + wrote_timeline: bool, + wrote_config_toml: bool, +) -> MigrationReport: + return MigrationReport( + source_root=str(source_root), + source_workspace=str(workspace_dir), + target_dir=str(target_dir) if target_dir else None, + dry_run=dry_run, + memory_entries_total=len(memory_entries), + memory_entries_from_sqlite=len(sqlite_entries), + memory_entries_from_markdown=len(markdown_entries), + workspace_files_total=len(workspace_files), + workspace_bytes_total=workspace_bytes_total, + config_found=config_found, + copied_workspace_files=copied_workspace_files, + copied_memory_entries=copied_memory_entries, + wrote_timeline=wrote_timeline, + wrote_config_toml=wrote_config_toml, + ) + + +def run_openclaw_migration( + *, + source_root: Path, + astrbot_root: Path, + dry_run: bool = False, + target_dir: Path | None = None, +) -> MigrationReport: + if not source_root.exists() or not source_root.is_dir(): + raise click.ClickException(f"OpenClaw source not found: {source_root}") + + if not check_astrbot_root(astrbot_root): + raise click.ClickException( + f"{astrbot_root} is not a valid AstrBot root. Run from initialized AstrBot root." + ) + + workspace_dir = _find_source_workspace(source_root) + sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") + markdown_entries = _read_openclaw_markdown_entries(workspace_dir) + memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) + + workspace_files = _collect_workspace_files(workspace_dir) + workspace_total_bytes = sum(path.stat().st_size for path in workspace_files) + + config_json_path = _find_openclaw_config_json(source_root, workspace_dir) + config_obj: dict[str, Any] | None = None + if config_json_path is not None: + config_obj = _load_json_or_raise(config_json_path) + + resolved_target: Path | None = None + if target_dir is not None: + resolved_target = ( + target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) + ) + elif not dry_run: + run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") + resolved_target = ( + astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" + ) + + copied_workspace_files = 0 + copied_memory_entries = 0 + wrote_timeline = False + wrote_config_toml = False + + if not dry_run and resolved_target is not None: + resolved_target.mkdir(parents=True, exist_ok=True) + workspace_target = resolved_target / "workspace" + workspace_target.mkdir(parents=True, exist_ok=True) + + for src_file in workspace_files: + rel_path = src_file.relative_to(workspace_dir) + dst_file = workspace_target / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src_file, dst_file) + copied_workspace_files += 1 + + if memory_entries: + _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) + copied_memory_entries = len(memory_entries) + _write_timeline( + resolved_target / "time_brief_history.md", + memory_entries, + source_root, + ) + wrote_timeline = True + + if config_obj is not None: + (resolved_target / "config.original.json").write_text( + json.dumps(config_obj, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + (resolved_target / "config.migrated.toml").write_text( + _json_to_toml(config_obj), + encoding="utf-8", + ) + wrote_config_toml = True + + report = _build_report( + source_root=source_root, + workspace_dir=workspace_dir, + target_dir=resolved_target, + dry_run=dry_run, + sqlite_entries=sqlite_entries, + markdown_entries=markdown_entries, + memory_entries=memory_entries, + workspace_files=workspace_files, + workspace_bytes_total=workspace_total_bytes, + config_found=config_obj is not None, + copied_workspace_files=copied_workspace_files, + copied_memory_entries=copied_memory_entries, + wrote_timeline=wrote_timeline, + wrote_config_toml=wrote_config_toml, + ) + + if not dry_run and resolved_target is not None: + (resolved_target / "migration_summary.json").write_text( + json.dumps(asdict(report), ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + return report + + +__all__ = [ + "MemoryEntry", + "MigrationReport", + "_json_to_toml", + "_read_openclaw_sqlite_entries", + "run_openclaw_migration", +] diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 8c2ba1e642..f17c93dd0f 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -5,6 +5,7 @@ from pathlib import Path from astrbot.cli.commands.cmd_migrate import ( + _json_to_toml, _read_openclaw_sqlite_entries, run_openclaw_migration, ) @@ -58,6 +59,17 @@ def _prepare_openclaw_source(source_root: Path) -> None: ) +def _read_migrated_memory_entries(target_dir: Path) -> list[dict[str, str | None]]: + memory_jsonl = target_dir / "memory_entries.jsonl" + entries: list[dict[str, str | None]] = [] + for line in memory_jsonl.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + payload = json.loads(line) + entries.append(payload) + return entries + + def test_read_openclaw_sqlite_entries_supports_legacy_columns(tmp_path: Path) -> None: db_dir = tmp_path / "memory" db_dir.mkdir(parents=True) @@ -139,7 +151,112 @@ def test_run_openclaw_migration_writes_artifacts(tmp_path: Path) -> None: assert "时间简史" in timeline toml_text = (target / "config.migrated.toml").read_text(encoding="utf-8") - assert "model = " in toml_text - assert "[memory]" in toml_text - assert "[[skills]]" in toml_text + assert '"model" = ' in toml_text + assert '["memory"]' in toml_text + assert '[["skills"]]' in toml_text + + +def test_markdown_parsing_structured_and_plain_lines(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-markdown"), + ) + assert report.target_dir is not None + entries = _read_migrated_memory_entries(Path(report.target_dir)) + + memory_md_entries = [ + entry + for entry in entries + if str(entry.get("source", "")).endswith("workspace/MEMORY.md") + ] + style_entries = [entry for entry in memory_md_entries if entry.get("key") == "style"] + assert len(style_entries) == 1 + assert style_entries[0].get("content") == "concise" + + plain_entries = [ + entry for entry in memory_md_entries if entry.get("content") == "keep logs" + ] + assert len(plain_entries) == 1 + assert str(plain_entries[0].get("key", "")).startswith("openclaw_core_") + + +def test_deduplication_between_sqlite_and_markdown_preserves_order( + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + memory_md = source_root / "workspace" / "MEMORY.md" + memory_md.write_text( + memory_md.read_text(encoding="utf-8") + "- likes rust\n", + encoding="utf-8", + ) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-dedup"), + ) + assert report.target_dir is not None + entries = _read_migrated_memory_entries(Path(report.target_dir)) + contents = [str(entry.get("content", "")) for entry in entries] + + assert contents.count("likes rust") == 1 + assert contents.index("likes rust") < contents.index("keep logs") + + +def test_run_openclaw_migration_invalid_config_json_raises_click_exception( + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + (source_root / "config.json").write_text("{ invalid json", encoding="utf-8") + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + import click + import pytest + + with pytest.raises(click.ClickException) as exc_info: + run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-invalid-config"), + ) + + assert "Failed to parse OpenClaw config JSON" in str(exc_info.value) + + +def test_json_to_toml_quotes_special_keys() -> None: + payload = { + "normal key": "ok", + "nested.obj": {"x y": 1}, + "list": [{"dot.key": True}], + } + toml_text = _json_to_toml(payload) + assert '"normal key" = "ok"' in toml_text + assert '["nested.obj"]' in toml_text + assert '"x y" = 1' in toml_text + assert '[["list"]]' in toml_text + assert '"dot.key" = true' in toml_text From b4eba8cb74e82caca8b6c6ce4c2702d31265a591 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 01:57:47 -0500 Subject: [PATCH 03/14] refactor(migrate): split toml utility and harden float/timestamp handling --- astrbot/cli/commands/cmd_migrate.py | 18 +-- astrbot/cli/utils/openclaw_migrate.py | 189 ++++++++---------------- astrbot/cli/utils/openclaw_toml.py | 101 +++++++++++++ tests/unit/test_cli_openclaw_migrate.py | 47 ++++-- 4 files changed, 197 insertions(+), 158 deletions(-) create mode 100644 astrbot/cli/utils/openclaw_toml.py diff --git a/astrbot/cli/commands/cmd_migrate.py b/astrbot/cli/commands/cmd_migrate.py index 4e74cf260f..63bd543a21 100644 --- a/astrbot/cli/commands/cmd_migrate.py +++ b/astrbot/cli/commands/cmd_migrate.py @@ -5,13 +5,7 @@ import click from ..utils import get_astrbot_root -from ..utils.openclaw_migrate import ( - MemoryEntry, - MigrationReport, - _json_to_toml, - _read_openclaw_sqlite_entries, - run_openclaw_migration, -) +from ..utils.openclaw_migrate import run_openclaw_migration @click.group(name="migrate") @@ -86,12 +80,4 @@ def migrate_openclaw( click.echo("Done.") -__all__ = [ - "MigrationReport", - "MemoryEntry", - "_json_to_toml", - "_read_openclaw_sqlite_entries", - "migrate", - "run_openclaw_migration", -] - +__all__ = ["migrate"] diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index 767456f9a3..8ca23654ad 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -11,6 +11,7 @@ import click from .basic import check_astrbot_root +from .openclaw_toml import json_to_toml SQLITE_KEY_CANDIDATES = ("key", "id", "name") SQLITE_CONTENT_CANDIDATES = ("content", "value", "text", "memory") @@ -53,31 +54,31 @@ def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str return None +def _timestamp_from_epoch(raw: float | int | str) -> str | None: + ts = float(raw) + if ts > 1e12: + ts /= 1000.0 + try: + return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() + except Exception: + return None + + def _normalize_timestamp(raw: Any) -> str | None: if raw is None: return None if isinstance(raw, (int, float)): - ts = float(raw) - if ts > 1e12: - ts /= 1000.0 - try: - return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() - except Exception: - return str(raw) + normalized = _timestamp_from_epoch(raw) + return normalized if normalized is not None else str(raw) text = str(raw).strip() if not text: return None if text.isdigit(): - ts = float(text) - if ts > 1e12: - ts /= 1000.0 - try: - return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() - except Exception: - return text + normalized = _timestamp_from_epoch(text) + return normalized if normalized is not None else text maybe_iso = text.replace("Z", "+00:00") try: @@ -111,6 +112,25 @@ def _parse_structured_line(line: str) -> tuple[str, str] | None: return key, value +def _discover_memory_columns( + cursor: sqlite3.Cursor, db_path: Path +) -> tuple[str, str, str | None, str | None]: + columns = { + str(row[1]).strip().lower() + for row in cursor.execute("PRAGMA table_info(memories)").fetchall() + } + + key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) or "rowid" + content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) + if content_col is None: + raise click.ClickException( + f"OpenClaw sqlite exists at {db_path}, but no content-like column found" + ) + category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) + ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) + return key_col, content_col, category_col, ts_col + + def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: if not db_path.exists(): return [] @@ -125,19 +145,9 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: if table_exists is None: return [] - columns = { - str(row[1]).strip().lower() - for row in cursor.execute("PRAGMA table_info(memories)").fetchall() - } - - key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) or "rowid" - content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) - if content_col is None: - raise click.ClickException( - f"OpenClaw sqlite exists at {db_path}, but no content-like column found" - ) - category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) - ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) + key_col, content_col, category_col, ts_col = _discover_memory_columns( + cursor, db_path + ) select_clauses = [ f"{key_col} AS __key__", @@ -245,25 +255,26 @@ def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: return entries +def _entry_signature(entry: MemoryEntry, *, semantic: bool) -> str: + if semantic: + parts = [entry.content.strip(), entry.category.strip()] + else: + parts = [ + entry.key.strip(), + entry.content.strip(), + entry.category.strip(), + entry.timestamp or "", + ] + return "\x00".join(parts) + + def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: seen_exact: set[str] = set() seen_semantic: set[str] = set() deduped: list[MemoryEntry] = [] for item in entries: - exact_signature = "\x00".join( - [ - item.key.strip(), - item.content.strip(), - item.category.strip(), - item.timestamp or "", - ] - ) - semantic_signature = "\x00".join( - [ - item.content.strip(), - item.category.strip(), - ] - ) + exact_signature = _entry_signature(item, semantic=False) + semantic_signature = _entry_signature(item, semantic=True) if exact_signature in seen_exact or semantic_signature in seen_semantic: continue seen_exact.add(exact_signature) @@ -300,91 +311,6 @@ def _collect_workspace_files(workspace_dir: Path) -> list[Path]: return sorted(files) -def _toml_escape(value: str) -> str: - return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") - - -def _toml_quote(value: str) -> str: - return f'"{_toml_escape(value)}"' - - -def _toml_format_key(key: str) -> str: - return _toml_quote(key) - - -def _toml_literal(value: Any) -> str: - if value is None: - return '"__NULL__"' - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, int): - return str(value) - if isinstance(value, float): - return repr(value) - if isinstance(value, str): - return _toml_quote(value) - if isinstance(value, list): - return "[" + ", ".join(_toml_literal(v) for v in value) + "]" - if isinstance(value, dict): - pairs = ", ".join( - f"{_toml_format_key(str(k))} = {_toml_literal(v)}" for k, v in value.items() - ) - return "{ " + pairs + " }" - return _toml_quote(str(value)) - - -def _format_toml_path(path: list[str]) -> str: - return ".".join(_toml_format_key(str(part)) for part in path) - - -def _json_to_toml(data: dict[str, Any]) -> str: - lines: list[str] = [] - - def emit_table(obj: dict[str, Any], path: list[str]) -> None: - scalar_items: list[tuple[str, Any]] = [] - nested_dicts: list[tuple[str, dict[str, Any]]] = [] - array_tables: list[tuple[str, list[dict[str, Any]]]] = [] - - for key, value in obj.items(): - if isinstance(value, dict): - nested_dicts.append((str(key), value)) - elif isinstance(value, list) and value and all( - isinstance(item, dict) for item in value - ): - array_tables.append((str(key), value)) - else: - scalar_items.append((str(key), value)) - - if path: - lines.append(f"[{_format_toml_path(path)}]") - for key, value in scalar_items: - lines.append(f"{_toml_format_key(key)} = {_toml_literal(value)}") - if scalar_items and (nested_dicts or array_tables): - lines.append("") - - for idx, (key, value) in enumerate(nested_dicts): - emit_table(value, [*path, key]) - if idx != len(nested_dicts) - 1 or array_tables: - lines.append("") - - for t_idx, (key, items) in enumerate(array_tables): - table_path = [*path, key] - for item in items: - lines.append(f"[[{_format_toml_path(table_path)}]]") - for sub_key, sub_value in item.items(): - lines.append( - f"{_toml_format_key(str(sub_key))} = {_toml_literal(sub_value)}" - ) - lines.append("") - if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": - lines.pop() - - emit_table(data, []) - if not lines: - return "" - return "\n".join(lines).rstrip() + "\n" - - def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: with path.open("w", encoding="utf-8") as fp: for entry in entries: @@ -545,8 +471,15 @@ def run_openclaw_migration( json.dumps(config_obj, ensure_ascii=False, indent=2), encoding="utf-8", ) + try: + converted_toml = json_to_toml(config_obj) + except ValueError as exc: + source_hint = str(config_json_path) if config_json_path else "config JSON" + raise click.ClickException( + f"Failed to convert {source_hint} to TOML: {exc}" + ) from exc (resolved_target / "config.migrated.toml").write_text( - _json_to_toml(config_obj), + converted_toml, encoding="utf-8", ) wrote_config_toml = True @@ -580,7 +513,5 @@ def run_openclaw_migration( __all__ = [ "MemoryEntry", "MigrationReport", - "_json_to_toml", - "_read_openclaw_sqlite_entries", "run_openclaw_migration", ] diff --git a/astrbot/cli/utils/openclaw_toml.py b/astrbot/cli/utils/openclaw_toml.py new file mode 100644 index 0000000000..a185c4a4b4 --- /dev/null +++ b/astrbot/cli/utils/openclaw_toml.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import math +from typing import Any + +# TOML has no null literal. Keep this centralized so behavior is explicit and +# easy to adjust in future migrations. +NULL_SENTINEL = "__NULL__" + + +def _toml_escape(value: str) -> str: + return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + + +def _toml_quote(value: str) -> str: + return f'"{_toml_escape(value)}"' + + +def _toml_format_key(key: str) -> str: + return _toml_quote(key) + + +def _format_toml_path(path: list[str]) -> str: + return ".".join(_toml_format_key(str(part)) for part in path) + + +def _toml_literal(value: Any) -> str: + if value is None: + return _toml_quote(NULL_SENTINEL) + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, int): + return str(value) + if isinstance(value, float): + if not math.isfinite(value): + # TOML 1.0 does not allow NaN/Infinity. + raise ValueError(f"non-finite float value is not TOML-compatible: {value}") + return repr(value) + if isinstance(value, str): + return _toml_quote(value) + if isinstance(value, list): + return "[" + ", ".join(_toml_literal(v) for v in value) + "]" + if isinstance(value, dict): + pairs = ", ".join( + f"{_toml_format_key(str(k))} = {_toml_literal(v)}" for k, v in value.items() + ) + return "{ " + pairs + " }" + return _toml_quote(str(value)) + + +def json_to_toml(data: dict[str, Any]) -> str: + """Serialize a JSON-like dict to TOML text used by migration snapshots.""" + lines: list[str] = [] + + def emit_table(obj: dict[str, Any], path: list[str]) -> None: + scalar_items: list[tuple[str, Any]] = [] + nested_dicts: list[tuple[str, dict[str, Any]]] = [] + array_tables: list[tuple[str, list[dict[str, Any]]]] = [] + + for key, value in obj.items(): + if isinstance(value, dict): + nested_dicts.append((str(key), value)) + elif isinstance(value, list) and value and all( + isinstance(item, dict) for item in value + ): + array_tables.append((str(key), value)) + else: + scalar_items.append((str(key), value)) + + if path: + lines.append(f"[{_format_toml_path(path)}]") + for key, value in scalar_items: + lines.append(f"{_toml_format_key(key)} = {_toml_literal(value)}") + if scalar_items and (nested_dicts or array_tables): + lines.append("") + + for idx, (key, value) in enumerate(nested_dicts): + emit_table(value, [*path, key]) + if idx != len(nested_dicts) - 1 or array_tables: + lines.append("") + + for t_idx, (key, items) in enumerate(array_tables): + table_path = [*path, key] + for item in items: + lines.append(f"[[{_format_toml_path(table_path)}]]") + for sub_key, sub_value in item.items(): + lines.append( + f"{_toml_format_key(str(sub_key))} = {_toml_literal(sub_value)}" + ) + lines.append("") + if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": + lines.pop() + + emit_table(data, []) + if not lines: + return "" + return "\n".join(lines).rstrip() + "\n" + + +__all__ = ["NULL_SENTINEL", "json_to_toml"] + diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index f17c93dd0f..5e33350909 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -4,11 +4,8 @@ import sqlite3 from pathlib import Path -from astrbot.cli.commands.cmd_migrate import ( - _json_to_toml, - _read_openclaw_sqlite_entries, - run_openclaw_migration, -) +from astrbot.cli.utils.openclaw_migrate import run_openclaw_migration +from astrbot.cli.utils.openclaw_toml import json_to_toml def _prepare_astrbot_root(root: Path) -> None: @@ -70,9 +67,14 @@ def _read_migrated_memory_entries(target_dir: Path) -> list[dict[str, str | None return entries -def test_read_openclaw_sqlite_entries_supports_legacy_columns(tmp_path: Path) -> None: - db_dir = tmp_path / "memory" +def test_migration_supports_legacy_sqlite_columns(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + workspace = source_root / "workspace" + db_dir = workspace / "memory" db_dir.mkdir(parents=True) + (workspace / "notes").mkdir(parents=True, exist_ok=True) + (workspace / "MEMORY.md").write_text("", encoding="utf-8") + db_path = db_dir / "brain.db" conn = sqlite3.connect(db_path) try: @@ -87,12 +89,24 @@ def test_read_openclaw_sqlite_entries_supports_legacy_columns(tmp_path: Path) -> finally: conn.close() - entries = _read_openclaw_sqlite_entries(db_path) + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-legacy-sqlite"), + ) + + assert report.target_dir is not None + entries = _read_migrated_memory_entries(Path(report.target_dir)) assert len(entries) == 1 - assert entries[0].key == "legacy_key" - assert entries[0].content == "legacy_value" - assert entries[0].category == "daily" - assert entries[0].timestamp is not None + assert entries[0].get("key") == "legacy_key" + assert entries[0].get("content") == "legacy_value" + assert entries[0].get("category") == "daily" + assert entries[0].get("timestamp") is not None def test_run_openclaw_migration_dry_run(tmp_path: Path) -> None: @@ -253,10 +267,17 @@ def test_json_to_toml_quotes_special_keys() -> None: "nested.obj": {"x y": 1}, "list": [{"dot.key": True}], } - toml_text = _json_to_toml(payload) + toml_text = json_to_toml(payload) assert '"normal key" = "ok"' in toml_text assert '["nested.obj"]' in toml_text assert '"x y" = 1' in toml_text assert '[["list"]]' in toml_text assert '"dot.key" = true' in toml_text + + +def test_json_to_toml_rejects_non_finite_float() -> None: + import pytest + + with pytest.raises(ValueError): + json_to_toml({"invalid": float("nan")}) From 3305a1ffa88eb94d6704cc17ea5d612b7a0f0ffe Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 02:04:19 -0500 Subject: [PATCH 04/14] refactor(migrate): improve determinism and robustness in openclaw import --- astrbot/cli/utils/openclaw_migrate.py | 95 ++++++++++++--------------- astrbot/cli/utils/openclaw_toml.py | 10 ++- 2 files changed, 50 insertions(+), 55 deletions(-) diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index 8ca23654ad..46e229008a 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -159,8 +159,13 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: ), f"{ts_col} AS __timestamp__" if ts_col is not None else "NULL AS __timestamp__", ] + order_by_clause = ( + " ORDER BY __timestamp__ ASC, __key__ ASC" + if ts_col is not None + else " ORDER BY __key__ ASC" + ) rows = cursor.execute( - "SELECT " + ", ".join(select_clauses) + " FROM memories" + "SELECT " + ", ".join(select_clauses) + " FROM memories" + order_by_clause ).fetchall() entries: list[MemoryEntry] = [] @@ -255,17 +260,24 @@ def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: return entries -def _entry_signature(entry: MemoryEntry, *, semantic: bool) -> str: - if semantic: - parts = [entry.content.strip(), entry.category.strip()] - else: - parts = [ +def _exact_signature(entry: MemoryEntry) -> str: + return "\x00".join( + [ entry.key.strip(), entry.content.strip(), entry.category.strip(), entry.timestamp or "", ] - return "\x00".join(parts) + ) + + +def _semantic_signature(entry: MemoryEntry) -> str: + return "\x00".join( + [ + entry.content.strip(), + entry.category.strip(), + ] + ) def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: @@ -273,8 +285,8 @@ def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: seen_semantic: set[str] = set() deduped: list[MemoryEntry] = [] for item in entries: - exact_signature = _entry_signature(item, semantic=False) - semantic_signature = _entry_signature(item, semantic=True) + exact_signature = _exact_signature(item) + semantic_signature = _semantic_signature(item) if exact_signature in seen_exact or semantic_signature in seen_semantic: continue seen_exact.add(exact_signature) @@ -311,6 +323,18 @@ def _collect_workspace_files(workspace_dir: Path) -> list[Path]: return sorted(files) +def _workspace_total_size(files: list[Path]) -> int: + total_bytes = 0 + for path in files: + try: + total_bytes += path.stat().st_size + except OSError: + # Best-effort accounting: files may disappear or become unreadable + # during migration scans. + continue + return total_bytes + + def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: with path.open("w", encoding="utf-8") as fp: for entry in entries: @@ -365,41 +389,6 @@ def _load_json_or_raise(path: Path) -> dict[str, Any]: ) from exc -def _build_report( - *, - source_root: Path, - workspace_dir: Path, - target_dir: Path | None, - dry_run: bool, - sqlite_entries: list[MemoryEntry], - markdown_entries: list[MemoryEntry], - memory_entries: list[MemoryEntry], - workspace_files: list[Path], - workspace_bytes_total: int, - config_found: bool, - copied_workspace_files: int, - copied_memory_entries: int, - wrote_timeline: bool, - wrote_config_toml: bool, -) -> MigrationReport: - return MigrationReport( - source_root=str(source_root), - source_workspace=str(workspace_dir), - target_dir=str(target_dir) if target_dir else None, - dry_run=dry_run, - memory_entries_total=len(memory_entries), - memory_entries_from_sqlite=len(sqlite_entries), - memory_entries_from_markdown=len(markdown_entries), - workspace_files_total=len(workspace_files), - workspace_bytes_total=workspace_bytes_total, - config_found=config_found, - copied_workspace_files=copied_workspace_files, - copied_memory_entries=copied_memory_entries, - wrote_timeline=wrote_timeline, - wrote_config_toml=wrote_config_toml, - ) - - def run_openclaw_migration( *, source_root: Path, @@ -421,7 +410,7 @@ def run_openclaw_migration( memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) workspace_files = _collect_workspace_files(workspace_dir) - workspace_total_bytes = sum(path.stat().st_size for path in workspace_files) + workspace_total_bytes = _workspace_total_size(workspace_files) config_json_path = _find_openclaw_config_json(source_root, workspace_dir) config_obj: dict[str, Any] | None = None @@ -484,15 +473,15 @@ def run_openclaw_migration( ) wrote_config_toml = True - report = _build_report( - source_root=source_root, - workspace_dir=workspace_dir, - target_dir=resolved_target, + report = MigrationReport( + source_root=str(source_root), + source_workspace=str(workspace_dir), + target_dir=str(resolved_target) if resolved_target else None, dry_run=dry_run, - sqlite_entries=sqlite_entries, - markdown_entries=markdown_entries, - memory_entries=memory_entries, - workspace_files=workspace_files, + memory_entries_total=len(memory_entries), + memory_entries_from_sqlite=len(sqlite_entries), + memory_entries_from_markdown=len(markdown_entries), + workspace_files_total=len(workspace_files), workspace_bytes_total=workspace_total_bytes, config_found=config_obj is not None, copied_workspace_files=copied_workspace_files, diff --git a/astrbot/cli/utils/openclaw_toml.py b/astrbot/cli/utils/openclaw_toml.py index a185c4a4b4..e52b881c7d 100644 --- a/astrbot/cli/utils/openclaw_toml.py +++ b/astrbot/cli/utils/openclaw_toml.py @@ -49,7 +49,14 @@ def _toml_literal(value: Any) -> str: def json_to_toml(data: dict[str, Any]) -> str: - """Serialize a JSON-like dict to TOML text used by migration snapshots.""" + """Serialize a JSON-like dict to TOML text used by migration snapshots. + + Notes: + - Empty lists are emitted as `key = []`. + - Only non-empty `list[dict]` values are emitted as array-of-tables. + For empty lists we intentionally preserve literal emptiness because the + element schema is unknown at serialization time. + """ lines: list[str] = [] def emit_table(obj: dict[str, Any], path: list[str]) -> None: @@ -98,4 +105,3 @@ def emit_table(obj: dict[str, Any], path: list[str]) -> None: __all__ = ["NULL_SENTINEL", "json_to_toml"] - From 6c29d4032b0c7f5421dca6eed80cbf5a891af516 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 02:31:48 -0500 Subject: [PATCH 05/14] fix(cli): wrap openclaw sqlite read errors as click exceptions --- astrbot/cli/utils/openclaw_migrate.py | 15 +++++++---- tests/unit/test_cli_openclaw_migrate.py | 33 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index 46e229008a..f9aaba3c5d 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -135,9 +135,10 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: if not db_path.exists(): return [] - conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) - conn.row_factory = sqlite3.Row + conn: sqlite3.Connection | None = None try: + conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) + conn.row_factory = sqlite3.Row cursor = conn.cursor() table_exists = cursor.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='memories' LIMIT 1" @@ -178,8 +179,7 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: MemoryEntry( key=_normalize_key(row["__key__"], idx), content=content, - category=str(row["__category__"] or "core").strip().lower() - or "core", + category=str(row["__category__"] or "core").strip().lower() or "core", timestamp=_normalize_timestamp(row["__timestamp__"]), source=f"sqlite:{db_path}", order=idx, @@ -187,8 +187,13 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: ) return entries + except sqlite3.Error as exc: + raise click.ClickException( + f"Failed to read OpenClaw sqlite at {db_path}: {exc}" + ) from exc finally: - conn.close() + if conn is not None: + conn.close() def _parse_markdown_file( diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 5e33350909..4c0c23ff7c 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -261,6 +261,39 @@ def test_run_openclaw_migration_invalid_config_json_raises_click_exception( assert "Failed to parse OpenClaw config JSON" in str(exc_info.value) +def test_run_openclaw_migration_invalid_sqlite_raises_click_exception( + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + workspace = source_root / "workspace" + (workspace / "memory").mkdir(parents=True, exist_ok=True) + (workspace / "notes").mkdir(parents=True, exist_ok=True) + (workspace / "MEMORY.md").write_text("", encoding="utf-8") + (workspace / "memory" / "brain.db").write_text( + "not a sqlite database", + encoding="utf-8", + ) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + import click + import pytest + + with pytest.raises(click.ClickException) as exc_info: + run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=True, + ) + + err_text = str(exc_info.value) + assert "Failed to read OpenClaw sqlite at" in err_text + assert "brain.db" in err_text + + def test_json_to_toml_quotes_special_keys() -> None: payload = { "normal key": "ok", From c27096fde06323b09b24ff4df3d051897318c8fe Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 02:38:48 -0500 Subject: [PATCH 06/14] fix(migrate): harden sqlite key fallback and polish cli report --- astrbot/cli/commands/cmd_migrate.py | 2 +- astrbot/cli/utils/openclaw_migrate.py | 39 ++++++++++++++++---- tests/unit/test_cli_openclaw_migrate.py | 48 +++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/astrbot/cli/commands/cmd_migrate.py b/astrbot/cli/commands/cmd_migrate.py index 63bd543a21..05c6200d78 100644 --- a/astrbot/cli/commands/cmd_migrate.py +++ b/astrbot/cli/commands/cmd_migrate.py @@ -76,7 +76,7 @@ def migrate_openclaw( click.echo(f" Copied files: {report.copied_workspace_files}") click.echo(f" Imported memories: {report.copied_memory_entries}") click.echo(f" Timeline written: {report.wrote_timeline}") - click.echo(f" Config TOML written:{report.wrote_config_toml}") + click.echo(f" Config TOML written: {report.wrote_config_toml}") click.echo("Done.") diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index f9aaba3c5d..17ebdfa327 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -55,10 +55,10 @@ def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str def _timestamp_from_epoch(raw: float | int | str) -> str | None: - ts = float(raw) - if ts > 1e12: - ts /= 1000.0 try: + ts = float(raw) + if ts > 1e12: + ts /= 1000.0 return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() except Exception: return None @@ -115,17 +115,42 @@ def _parse_structured_line(line: str) -> tuple[str, str] | None: def _discover_memory_columns( cursor: sqlite3.Cursor, db_path: Path ) -> tuple[str, str, str | None, str | None]: - columns = { + table_info_rows = cursor.execute("PRAGMA table_info(memories)").fetchall() + columns_in_order = [ str(row[1]).strip().lower() - for row in cursor.execute("PRAGMA table_info(memories)").fetchall() - } + for row in table_info_rows + if str(row[1]).strip() + ] + columns = set(columns_in_order) + + key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) + if key_col is None: + pk_columns = sorted( + ( + (int(row[5]), str(row[1]).strip().lower()) + for row in table_info_rows + if int(row[5]) > 0 and str(row[1]).strip() + ), + key=lambda item: item[0], + ) + if pk_columns: + key_col = pk_columns[0][1] + if key_col is None: + try: + cursor.execute("SELECT rowid FROM memories LIMIT 1").fetchone() + key_col = "rowid" + except sqlite3.Error: + key_col = columns_in_order[0] if columns_in_order else None - key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) or "rowid" content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) if content_col is None: raise click.ClickException( f"OpenClaw sqlite exists at {db_path}, but no content-like column found" ) + if key_col is None: + raise click.ClickException( + f"OpenClaw sqlite exists at {db_path}, but no key-like or usable fallback column found" + ) category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) return key_col, content_col, category_col, ts_col diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 4c0c23ff7c..dd99dafad2 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -109,6 +109,54 @@ def test_migration_supports_legacy_sqlite_columns(tmp_path: Path) -> None: assert entries[0].get("timestamp") is not None +def test_migration_handles_without_rowid_memories_table(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + workspace = source_root / "workspace" + db_dir = workspace / "memory" + db_dir.mkdir(parents=True) + (workspace / "notes").mkdir(parents=True, exist_ok=True) + (workspace / "MEMORY.md").write_text("", encoding="utf-8") + + db_path = db_dir / "brain.db" + conn = sqlite3.connect(db_path) + try: + conn.execute( + """ + CREATE TABLE memories ( + value TEXT NOT NULL, + type TEXT NOT NULL, + updated_at INTEGER, + PRIMARY KEY (value, type) + ) WITHOUT ROWID + """ + ) + conn.execute( + "INSERT INTO memories (value, type, updated_at) VALUES (?, ?, ?)", + ("without-rowid-content", "core", 1700000000), + ) + conn.commit() + finally: + conn.close() + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-without-rowid"), + ) + + assert report.target_dir is not None + entries = _read_migrated_memory_entries(Path(report.target_dir)) + assert len(entries) == 1 + assert entries[0].get("content") == "without-rowid-content" + assert entries[0].get("category") == "core" + assert entries[0].get("key") == "without-rowid-content" + + def test_run_openclaw_migration_dry_run(tmp_path: Path) -> None: source_root = tmp_path / ".openclaw" source_root.mkdir(parents=True) From a6d49e83fdc68e97fc51f28c6d629a318eada2a1 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 02:45:34 -0500 Subject: [PATCH 07/14] refactor(migrate): streamline orchestration and toml normalization --- astrbot/cli/utils/openclaw_migrate.py | 182 ++++++++++++++++-------- astrbot/cli/utils/openclaw_toml.py | 52 ++++--- docs/en/deploy/astrbot/cli.md | 2 +- tests/unit/test_cli_openclaw_migrate.py | 15 ++ 4 files changed, 173 insertions(+), 78 deletions(-) diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index 17ebdfa327..833e750e66 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -47,6 +47,21 @@ class MigrationReport: wrote_config_toml: bool +@dataclass(slots=True) +class MemoryCollection: + entries: list[MemoryEntry] + from_sqlite: int + from_markdown: int + + +@dataclass(slots=True) +class MigrationArtifacts: + copied_workspace_files: int + copied_memory_entries: int + wrote_timeline: bool + wrote_config_toml: bool + + def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str | None: for candidate in candidates: if candidate in columns: @@ -419,6 +434,88 @@ def _load_json_or_raise(path: Path) -> dict[str, Any]: ) from exc +def _collect_memory_entries(workspace_dir: Path) -> MemoryCollection: + sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") + markdown_entries = _read_openclaw_markdown_entries(workspace_dir) + memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) + return MemoryCollection( + entries=memory_entries, + from_sqlite=len(sqlite_entries), + from_markdown=len(markdown_entries), + ) + + +def _resolve_target_dir( + astrbot_root: Path, target_dir: Path | None, dry_run: bool +) -> Path | None: + if target_dir is not None: + return target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) + if dry_run: + return None + run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") + return astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" + + +def _write_migration_artifacts( + *, + workspace_dir: Path, + workspace_files: list[Path], + resolved_target: Path, + source_root: Path, + memory_entries: list[MemoryEntry], + config_obj: dict[str, Any] | None, + config_json_path: Path | None, +) -> MigrationArtifacts: + workspace_target = resolved_target / "workspace" + workspace_target.mkdir(parents=True, exist_ok=True) + + copied_workspace_files = 0 + for src_file in workspace_files: + rel_path = src_file.relative_to(workspace_dir) + dst_file = workspace_target / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src_file, dst_file) + copied_workspace_files += 1 + + copied_memory_entries = 0 + wrote_timeline = False + if memory_entries: + _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) + copied_memory_entries = len(memory_entries) + _write_timeline( + resolved_target / "time_brief_history.md", + memory_entries, + source_root, + ) + wrote_timeline = True + + wrote_config_toml = False + if config_obj is not None: + (resolved_target / "config.original.json").write_text( + json.dumps(config_obj, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + try: + converted_toml = json_to_toml(config_obj) + except ValueError as exc: + source_hint = str(config_json_path) if config_json_path else "config JSON" + raise click.ClickException( + f"Failed to convert {source_hint} to TOML: {exc}" + ) from exc + (resolved_target / "config.migrated.toml").write_text( + converted_toml, + encoding="utf-8", + ) + wrote_config_toml = True + + return MigrationArtifacts( + copied_workspace_files=copied_workspace_files, + copied_memory_entries=copied_memory_entries, + wrote_timeline=wrote_timeline, + wrote_config_toml=wrote_config_toml, + ) + + def run_openclaw_migration( *, source_root: Path, @@ -435,9 +532,8 @@ def run_openclaw_migration( ) workspace_dir = _find_source_workspace(source_root) - sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") - markdown_entries = _read_openclaw_markdown_entries(workspace_dir) - memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) + memory_collection = _collect_memory_entries(workspace_dir) + memory_entries = memory_collection.entries workspace_files = _collect_workspace_files(workspace_dir) workspace_total_bytes = _workspace_total_size(workspace_files) @@ -447,61 +543,25 @@ def run_openclaw_migration( if config_json_path is not None: config_obj = _load_json_or_raise(config_json_path) - resolved_target: Path | None = None - if target_dir is not None: - resolved_target = ( - target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) - ) - elif not dry_run: - run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") - resolved_target = ( - astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" - ) - - copied_workspace_files = 0 - copied_memory_entries = 0 - wrote_timeline = False - wrote_config_toml = False + resolved_target = _resolve_target_dir(astrbot_root, target_dir, dry_run) + artifacts = MigrationArtifacts( + copied_workspace_files=0, + copied_memory_entries=0, + wrote_timeline=False, + wrote_config_toml=False, + ) if not dry_run and resolved_target is not None: resolved_target.mkdir(parents=True, exist_ok=True) - workspace_target = resolved_target / "workspace" - workspace_target.mkdir(parents=True, exist_ok=True) - - for src_file in workspace_files: - rel_path = src_file.relative_to(workspace_dir) - dst_file = workspace_target / rel_path - dst_file.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(src_file, dst_file) - copied_workspace_files += 1 - - if memory_entries: - _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) - copied_memory_entries = len(memory_entries) - _write_timeline( - resolved_target / "time_brief_history.md", - memory_entries, - source_root, - ) - wrote_timeline = True - - if config_obj is not None: - (resolved_target / "config.original.json").write_text( - json.dumps(config_obj, ensure_ascii=False, indent=2), - encoding="utf-8", - ) - try: - converted_toml = json_to_toml(config_obj) - except ValueError as exc: - source_hint = str(config_json_path) if config_json_path else "config JSON" - raise click.ClickException( - f"Failed to convert {source_hint} to TOML: {exc}" - ) from exc - (resolved_target / "config.migrated.toml").write_text( - converted_toml, - encoding="utf-8", - ) - wrote_config_toml = True + artifacts = _write_migration_artifacts( + workspace_dir=workspace_dir, + workspace_files=workspace_files, + resolved_target=resolved_target, + source_root=source_root, + memory_entries=memory_entries, + config_obj=config_obj, + config_json_path=config_json_path, + ) report = MigrationReport( source_root=str(source_root), @@ -509,15 +569,15 @@ def run_openclaw_migration( target_dir=str(resolved_target) if resolved_target else None, dry_run=dry_run, memory_entries_total=len(memory_entries), - memory_entries_from_sqlite=len(sqlite_entries), - memory_entries_from_markdown=len(markdown_entries), + memory_entries_from_sqlite=memory_collection.from_sqlite, + memory_entries_from_markdown=memory_collection.from_markdown, workspace_files_total=len(workspace_files), workspace_bytes_total=workspace_total_bytes, config_found=config_obj is not None, - copied_workspace_files=copied_workspace_files, - copied_memory_entries=copied_memory_entries, - wrote_timeline=wrote_timeline, - wrote_config_toml=wrote_config_toml, + copied_workspace_files=artifacts.copied_workspace_files, + copied_memory_entries=artifacts.copied_memory_entries, + wrote_timeline=artifacts.wrote_timeline, + wrote_config_toml=artifacts.wrote_config_toml, ) if not dry_run and resolved_target is not None: diff --git a/astrbot/cli/utils/openclaw_toml.py b/astrbot/cli/utils/openclaw_toml.py index e52b881c7d..bb091dd7f5 100644 --- a/astrbot/cli/utils/openclaw_toml.py +++ b/astrbot/cli/utils/openclaw_toml.py @@ -24,9 +24,40 @@ def _format_toml_path(path: list[str]) -> str: return ".".join(_toml_format_key(str(part)) for part in path) +def _normalize_nulls(obj: Any) -> Any: + if obj is None: + return NULL_SENTINEL + if isinstance(obj, dict): + return {key: _normalize_nulls(value) for key, value in obj.items()} + if isinstance(obj, list): + return [_normalize_nulls(value) for value in obj] + return obj + + +def _classify_items( + obj: dict[str, Any], +) -> tuple[ + list[tuple[str, Any]], + list[tuple[str, dict[str, Any]]], + list[tuple[str, list[dict[str, Any]]]], +]: + scalar_items: list[tuple[str, Any]] = [] + nested_dicts: list[tuple[str, dict[str, Any]]] = [] + array_tables: list[tuple[str, list[dict[str, Any]]]] = [] + + for key, value in obj.items(): + key_text = str(key) + if isinstance(value, dict): + nested_dicts.append((key_text, value)) + elif isinstance(value, list) and value and all(isinstance(item, dict) for item in value): + array_tables.append((key_text, value)) + else: + scalar_items.append((key_text, value)) + + return scalar_items, nested_dicts, array_tables + + def _toml_literal(value: Any) -> str: - if value is None: - return _toml_quote(NULL_SENTINEL) if isinstance(value, bool): return "true" if value else "false" if isinstance(value, int): @@ -57,22 +88,11 @@ def json_to_toml(data: dict[str, Any]) -> str: For empty lists we intentionally preserve literal emptiness because the element schema is unknown at serialization time. """ + normalized_data = _normalize_nulls(data) lines: list[str] = [] def emit_table(obj: dict[str, Any], path: list[str]) -> None: - scalar_items: list[tuple[str, Any]] = [] - nested_dicts: list[tuple[str, dict[str, Any]]] = [] - array_tables: list[tuple[str, list[dict[str, Any]]]] = [] - - for key, value in obj.items(): - if isinstance(value, dict): - nested_dicts.append((str(key), value)) - elif isinstance(value, list) and value and all( - isinstance(item, dict) for item in value - ): - array_tables.append((str(key), value)) - else: - scalar_items.append((str(key), value)) + scalar_items, nested_dicts, array_tables = _classify_items(obj) if path: lines.append(f"[{_format_toml_path(path)}]") @@ -98,7 +118,7 @@ def emit_table(obj: dict[str, Any], path: list[str]) -> None: if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": lines.pop() - emit_table(data, []) + emit_table(normalized_data, []) if not lines: return "" return "\n".join(lines).rstrip() + "\n" diff --git a/docs/en/deploy/astrbot/cli.md b/docs/en/deploy/astrbot/cli.md index 7ac10c2a21..ecac9ae82b 100644 --- a/docs/en/deploy/astrbot/cli.md +++ b/docs/en/deploy/astrbot/cli.md @@ -96,7 +96,7 @@ Next, you need to deploy any messaging platform to use AstrBot on that platform. If you previously used OpenClaw, AstrBot now provides a preliminary migration command that can import OpenClaw workspace snapshots into AstrBot migration artifacts (including memory entries, workspace files, config conversion, and a generated `time_brief_history.md`): ```bash -# Preview only (no write) +# Preview only (no writes to disk) astrbot migrate openclaw --dry-run # Execute migration diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index dd99dafad2..99686387c4 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -362,3 +362,18 @@ def test_json_to_toml_rejects_non_finite_float() -> None: with pytest.raises(ValueError): json_to_toml({"invalid": float("nan")}) + + +def test_json_to_toml_preserves_null_sentinel_behavior() -> None: + toml_text = json_to_toml( + { + "nullable": None, + "nested": {"inner": None}, + "list": [None, 1], + } + ) + + assert '"nullable" = "__NULL__"' in toml_text + assert '["nested"]' in toml_text + assert '"inner" = "__NULL__"' in toml_text + assert '"list" = ["__NULL__", 1]' in toml_text From 1f8a486ef6298d92549805cf3d3f4f733851ea5d Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 02:57:29 -0500 Subject: [PATCH 08/14] fix(migrate): normalize dry-run target and exclude nested output --- astrbot/cli/utils/openclaw_migrate.py | 31 ++++++++-- tests/unit/test_cli_openclaw_migrate.py | 77 +++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index 833e750e66..925a5693e0 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -360,11 +360,21 @@ def _find_openclaw_config_json(source_root: Path, workspace_dir: Path) -> Path | return None -def _collect_workspace_files(workspace_dir: Path) -> list[Path]: +def _collect_workspace_files( + workspace_dir: Path, *, exclude_dir: Path | None = None +) -> list[Path]: files: list[Path] = [] + exclude_resolved = exclude_dir.resolve() if exclude_dir is not None else None for path in workspace_dir.rglob("*"): - if path.is_file() and not path.is_symlink(): - files.append(path) + if not path.is_file() or path.is_symlink(): + continue + if exclude_resolved is not None: + try: + path.resolve().relative_to(exclude_resolved) + continue + except (OSError, ValueError): + pass + files.append(path) return sorted(files) @@ -448,10 +458,10 @@ def _collect_memory_entries(workspace_dir: Path) -> MemoryCollection: def _resolve_target_dir( astrbot_root: Path, target_dir: Path | None, dry_run: bool ) -> Path | None: - if target_dir is not None: - return target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) if dry_run: return None + if target_dir is not None: + return target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") return astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" @@ -535,7 +545,16 @@ def run_openclaw_migration( memory_collection = _collect_memory_entries(workspace_dir) memory_entries = memory_collection.entries - workspace_files = _collect_workspace_files(workspace_dir) + explicit_target_dir: Path | None = None + if target_dir is not None: + explicit_target_dir = ( + target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) + ) + + workspace_files = _collect_workspace_files( + workspace_dir, + exclude_dir=explicit_target_dir, + ) workspace_total_bytes = _workspace_total_size(workspace_files) config_json_path = _find_openclaw_config_json(source_root, workspace_dir) diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 99686387c4..2f3f571ad6 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -180,6 +180,30 @@ def test_run_openclaw_migration_dry_run(tmp_path: Path) -> None: assert not (astrbot_root / "data" / "migrations" / "openclaw").exists() +def test_run_openclaw_migration_dry_run_with_explicit_target_reports_none( + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + explicit_target = astrbot_root / "data" / "migrations" / "openclaw" / "dry-run-target" + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=True, + target_dir=explicit_target, + ) + + assert report.dry_run is True + assert report.target_dir is None + assert not explicit_target.exists() + + def test_run_openclaw_migration_writes_artifacts(tmp_path: Path) -> None: source_root = tmp_path / ".openclaw" source_root.mkdir(parents=True) @@ -218,6 +242,59 @@ def test_run_openclaw_migration_writes_artifacts(tmp_path: Path) -> None: assert '[["skills"]]' in toml_text +def test_run_openclaw_migration_writes_to_default_timestamp_target(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=None, + ) + + assert report.target_dir is not None + target = Path(report.target_dir) + assert target.exists() + expected_root = astrbot_root / "data" / "migrations" / "openclaw" + assert target.parent == expected_root + assert target.name.startswith("run-") + + +def test_run_openclaw_migration_excludes_target_inside_workspace(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + workspace = source_root / "workspace" + target_inside_workspace = workspace / "snapshot-output" + target_inside_workspace.mkdir(parents=True, exist_ok=True) + (target_inside_workspace / "stale.txt").write_text("old artifact", encoding="utf-8") + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=target_inside_workspace, + ) + + assert report.target_dir is not None + target = Path(report.target_dir) + assert target == target_inside_workspace + + # Files from the output directory itself must not be re-copied into snapshot workspace. + assert not (target / "workspace" / "snapshot-output" / "stale.txt").exists() + + def test_markdown_parsing_structured_and_plain_lines(tmp_path: Path) -> None: source_root = tmp_path / ".openclaw" source_root.mkdir(parents=True) From 4dd715480f9a92c321574c57eb73a28e0494b419 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 03:06:01 -0500 Subject: [PATCH 09/14] fix(migrate): tighten cli target semantics and add cmd regression test --- astrbot/cli/commands/cmd_migrate.py | 2 +- astrbot/cli/utils/__init__.py | 8 ---- astrbot/cli/utils/openclaw_toml.py | 2 +- tests/unit/test_cli_cmd_migrate.py | 54 +++++++++++++++++++++++++ tests/unit/test_cli_openclaw_migrate.py | 6 +-- 5 files changed, 59 insertions(+), 13 deletions(-) create mode 100644 tests/unit/test_cli_cmd_migrate.py diff --git a/astrbot/cli/commands/cmd_migrate.py b/astrbot/cli/commands/cmd_migrate.py index 05c6200d78..ee854e6c55 100644 --- a/astrbot/cli/commands/cmd_migrate.py +++ b/astrbot/cli/commands/cmd_migrate.py @@ -24,7 +24,7 @@ def migrate() -> None: @click.option( "--target", "target_path", - type=click.Path(path_type=Path, file_okay=False, resolve_path=True), + type=click.Path(path_type=Path, file_okay=False, resolve_path=False), default=None, help=( "Custom output directory. If omitted, writes to " diff --git a/astrbot/cli/utils/__init__.py b/astrbot/cli/utils/__init__.py index f0f131feea..3830682f0d 100644 --- a/astrbot/cli/utils/__init__.py +++ b/astrbot/cli/utils/__init__.py @@ -3,24 +3,16 @@ check_dashboard, get_astrbot_root, ) -from .openclaw_migrate import ( - MemoryEntry, - MigrationReport, - run_openclaw_migration, -) from .plugin import PluginStatus, build_plug_list, get_git_repo, manage_plugin from .version_comparator import VersionComparator __all__ = [ "PluginStatus", "VersionComparator", - "MemoryEntry", - "MigrationReport", "build_plug_list", "check_astrbot_root", "check_dashboard", "get_astrbot_root", "get_git_repo", "manage_plugin", - "run_openclaw_migration", ] diff --git a/astrbot/cli/utils/openclaw_toml.py b/astrbot/cli/utils/openclaw_toml.py index bb091dd7f5..28a0238925 100644 --- a/astrbot/cli/utils/openclaw_toml.py +++ b/astrbot/cli/utils/openclaw_toml.py @@ -5,7 +5,7 @@ # TOML has no null literal. Keep this centralized so behavior is explicit and # easy to adjust in future migrations. -NULL_SENTINEL = "__NULL__" +NULL_SENTINEL = "__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__" def _toml_escape(value: str) -> str: diff --git a/tests/unit/test_cli_cmd_migrate.py b/tests/unit/test_cli_cmd_migrate.py new file mode 100644 index 0000000000..702da91102 --- /dev/null +++ b/tests/unit/test_cli_cmd_migrate.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from pathlib import Path + +from click.testing import CliRunner + +from astrbot.cli.commands import cmd_migrate +from astrbot.cli.utils.openclaw_migrate import MigrationReport + + +def test_migrate_openclaw_reports_config_toml_field_and_relative_target( + monkeypatch, + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + + captured: dict[str, object] = {} + + def _fake_run_openclaw_migration(**kwargs: object) -> MigrationReport: + captured.update(kwargs) + return MigrationReport( + source_root=str(source_root), + source_workspace=str(source_root / "workspace"), + target_dir=str(astrbot_root / "data" / "migrations" / "openclaw" / "run-test"), + dry_run=False, + memory_entries_total=3, + memory_entries_from_sqlite=2, + memory_entries_from_markdown=1, + workspace_files_total=5, + workspace_bytes_total=1024, + config_found=True, + copied_workspace_files=5, + copied_memory_entries=3, + wrote_timeline=False, + wrote_config_toml=True, + ) + + monkeypatch.setattr(cmd_migrate, "get_astrbot_root", lambda: astrbot_root) + monkeypatch.setattr(cmd_migrate, "run_openclaw_migration", _fake_run_openclaw_migration) + + runner = CliRunner() + result = runner.invoke( + cmd_migrate.migrate, + ["openclaw", "--source", str(source_root), "--target", "data/migrations/custom"], + ) + + assert result.exit_code == 0, result.output + assert captured["target_dir"] == Path("data/migrations/custom") + assert "Timeline written: False" in result.output + assert "Config TOML written: True" in result.output + diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 2f3f571ad6..273725d172 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -450,7 +450,7 @@ def test_json_to_toml_preserves_null_sentinel_behavior() -> None: } ) - assert '"nullable" = "__NULL__"' in toml_text + assert '"nullable" = "__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__"' in toml_text assert '["nested"]' in toml_text - assert '"inner" = "__NULL__"' in toml_text - assert '"list" = ["__NULL__", 1]' in toml_text + assert '"inner" = "__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__"' in toml_text + assert '"list" = ["__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__", 1]' in toml_text From 40bbb9f2ea86d46a5410795faf886834b3c4ae0f Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 03:14:17 -0500 Subject: [PATCH 10/14] refactor(toml): simplify quoting helpers with behavior parity --- astrbot/cli/utils/openclaw_toml.py | 21 ++++++--------------- tests/unit/test_cli_openclaw_migrate.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/astrbot/cli/utils/openclaw_toml.py b/astrbot/cli/utils/openclaw_toml.py index 28a0238925..a583e1cec4 100644 --- a/astrbot/cli/utils/openclaw_toml.py +++ b/astrbot/cli/utils/openclaw_toml.py @@ -8,20 +8,13 @@ NULL_SENTINEL = "__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__" -def _toml_escape(value: str) -> str: - return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") - - def _toml_quote(value: str) -> str: - return f'"{_toml_escape(value)}"' - - -def _toml_format_key(key: str) -> str: - return _toml_quote(key) + escaped = value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + return f'"{escaped}"' def _format_toml_path(path: list[str]) -> str: - return ".".join(_toml_format_key(str(part)) for part in path) + return ".".join(_toml_quote(str(part)) for part in path) def _normalize_nulls(obj: Any) -> Any: @@ -73,7 +66,7 @@ def _toml_literal(value: Any) -> str: return "[" + ", ".join(_toml_literal(v) for v in value) + "]" if isinstance(value, dict): pairs = ", ".join( - f"{_toml_format_key(str(k))} = {_toml_literal(v)}" for k, v in value.items() + f"{_toml_quote(str(k))} = {_toml_literal(v)}" for k, v in value.items() ) return "{ " + pairs + " }" return _toml_quote(str(value)) @@ -97,7 +90,7 @@ def emit_table(obj: dict[str, Any], path: list[str]) -> None: if path: lines.append(f"[{_format_toml_path(path)}]") for key, value in scalar_items: - lines.append(f"{_toml_format_key(key)} = {_toml_literal(value)}") + lines.append(f"{_toml_quote(key)} = {_toml_literal(value)}") if scalar_items and (nested_dicts or array_tables): lines.append("") @@ -111,9 +104,7 @@ def emit_table(obj: dict[str, Any], path: list[str]) -> None: for item in items: lines.append(f"[[{_format_toml_path(table_path)}]]") for sub_key, sub_value in item.items(): - lines.append( - f"{_toml_format_key(str(sub_key))} = {_toml_literal(sub_value)}" - ) + lines.append(f"{_toml_quote(str(sub_key))} = {_toml_literal(sub_value)}") lines.append("") if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": lines.pop() diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 273725d172..b4306df319 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -454,3 +454,15 @@ def test_json_to_toml_preserves_null_sentinel_behavior() -> None: assert '["nested"]' in toml_text assert '"inner" = "__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__"' in toml_text assert '"list" = ["__ASTRBOT_OPENCLAW_NULL_SENTINEL_V1__", 1]' in toml_text + + +def test_json_to_toml_escapes_quotes_backslashes_and_newlines() -> None: + toml_text = json_to_toml( + { + 'k"ey': "line1\nline2", + "path": "C:\\tmp\\file.txt", + } + ) + + assert '"k\\"ey" = "line1\\nline2"' in toml_text + assert '"path" = "C:\\\\tmp\\\\file.txt"' in toml_text From 7391ba4d6ba58b1f542a0483cb8e2d2709ad3c20 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 03:18:42 -0500 Subject: [PATCH 11/14] refactor(toml): handle null sentinel inline without pre-pass --- astrbot/cli/utils/openclaw_toml.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/astrbot/cli/utils/openclaw_toml.py b/astrbot/cli/utils/openclaw_toml.py index a583e1cec4..090c682fd5 100644 --- a/astrbot/cli/utils/openclaw_toml.py +++ b/astrbot/cli/utils/openclaw_toml.py @@ -17,16 +17,6 @@ def _format_toml_path(path: list[str]) -> str: return ".".join(_toml_quote(str(part)) for part in path) -def _normalize_nulls(obj: Any) -> Any: - if obj is None: - return NULL_SENTINEL - if isinstance(obj, dict): - return {key: _normalize_nulls(value) for key, value in obj.items()} - if isinstance(obj, list): - return [_normalize_nulls(value) for value in obj] - return obj - - def _classify_items( obj: dict[str, Any], ) -> tuple[ @@ -51,6 +41,9 @@ def _classify_items( def _toml_literal(value: Any) -> str: + if value is None: + # TOML has no null literal; preserve previous output contract. + return _toml_quote(NULL_SENTINEL) if isinstance(value, bool): return "true" if value else "false" if isinstance(value, int): @@ -81,7 +74,6 @@ def json_to_toml(data: dict[str, Any]) -> str: For empty lists we intentionally preserve literal emptiness because the element schema is unknown at serialization time. """ - normalized_data = _normalize_nulls(data) lines: list[str] = [] def emit_table(obj: dict[str, Any], path: list[str]) -> None: @@ -109,7 +101,7 @@ def emit_table(obj: dict[str, Any], path: list[str]) -> None: if t_idx == len(array_tables) - 1 and lines and lines[-1] == "": lines.pop() - emit_table(normalized_data, []) + emit_table(data, []) if not lines: return "" return "\n".join(lines).rstrip() + "\n" From 9b136d5cbfc5cdd51b62d17c0102a4fcabeadeb7 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 03:27:23 -0500 Subject: [PATCH 12/14] refactor(migrate): modularize openclaw flow and harden dry-run scan --- astrbot/cli/commands/cmd_migrate.py | 2 + astrbot/cli/utils/openclaw_artifacts.py | 167 ++++++++ astrbot/cli/utils/openclaw_memory.py | 290 +++++++++++++ astrbot/cli/utils/openclaw_migrate.py | 540 ++---------------------- astrbot/cli/utils/openclaw_models.py | 34 ++ tests/unit/test_cli_cmd_migrate.py | 51 +++ tests/unit/test_cli_openclaw_migrate.py | 36 ++ 7 files changed, 620 insertions(+), 500 deletions(-) create mode 100644 astrbot/cli/utils/openclaw_artifacts.py create mode 100644 astrbot/cli/utils/openclaw_memory.py create mode 100644 astrbot/cli/utils/openclaw_models.py diff --git a/astrbot/cli/commands/cmd_migrate.py b/astrbot/cli/commands/cmd_migrate.py index ee854e6c55..1a77fb80bf 100644 --- a/astrbot/cli/commands/cmd_migrate.py +++ b/astrbot/cli/commands/cmd_migrate.py @@ -68,6 +68,8 @@ def migrate_openclaw( if dry_run: click.echo("") click.echo("Dry-run mode: no files were written.") + if target_path is not None: + click.echo("Note: --target is ignored when --dry-run is enabled.") click.echo("Run without --dry-run to perform migration.") return diff --git a/astrbot/cli/utils/openclaw_artifacts.py b/astrbot/cli/utils/openclaw_artifacts.py new file mode 100644 index 0000000000..884215a84a --- /dev/null +++ b/astrbot/cli/utils/openclaw_artifacts.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +import datetime as dt +import json +import os +import shutil +from pathlib import Path +from typing import Any + +import click + +from .openclaw_models import MemoryEntry +from .openclaw_toml import json_to_toml + + +def _is_within(path: Path, parent: Path) -> bool: + try: + path.resolve().relative_to(parent.resolve()) + return True + except (OSError, ValueError): + return False + + +def collect_workspace_files( + workspace_dir: Path, *, exclude_dir: Path | None = None +) -> list[Path]: + files: list[Path] = [] + exclude_resolved = exclude_dir.resolve() if exclude_dir is not None else None + + for root, dirnames, filenames in os.walk( + workspace_dir, topdown=True, followlinks=False + ): + root_path = Path(root) + + pruned_dirs: list[str] = [] + for dirname in dirnames: + dir_path = root_path / dirname + if dir_path.is_symlink(): + continue + if exclude_resolved is not None and _is_within(dir_path, exclude_resolved): + continue + pruned_dirs.append(dirname) + dirnames[:] = pruned_dirs + + for filename in filenames: + path = root_path / filename + if path.is_symlink() or not path.is_file(): + continue + if exclude_resolved is not None and _is_within(path, exclude_resolved): + continue + files.append(path) + + return sorted(files) + + +def workspace_total_size(files: list[Path]) -> int: + total_bytes = 0 + for path in files: + try: + total_bytes += path.stat().st_size + except OSError: + # Best-effort accounting: files may disappear or become unreadable + # during migration scans. + continue + return total_bytes + + +def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: + with path.open("w", encoding="utf-8") as fp: + for entry in entries: + fp.write( + json.dumps( + { + "key": entry.key, + "content": entry.content, + "category": entry.category, + "timestamp": entry.timestamp, + "source": entry.source, + }, + ensure_ascii=False, + ) + + "\n" + ) + + +def _write_timeline(path: Path, entries: list[MemoryEntry], source_root: Path) -> None: + ordered = sorted(entries, key=lambda e: (e.timestamp or "", e.order)) + + lines: list[str] = [] + lines.append("# OpenClaw Migration - Time Brief History") + lines.append("") + lines.append("> 时间简史(初步方案):按时间汇总可迁移记忆条目。") + lines.append("") + lines.append(f"- Generated at: {dt.datetime.now(dt.timezone.utc).isoformat()}") + lines.append(f"- Source: `{source_root}`") + lines.append(f"- Total entries: {len(ordered)}") + lines.append("") + lines.append("## Timeline") + lines.append("") + + for entry in ordered: + ts = entry.timestamp or "unknown" + snippet = entry.content.replace("\n", " ").strip() + if len(snippet) > 160: + snippet = snippet[:157] + "..." + lines.append(f"- [{ts}] ({entry.category}) `{entry.key}`: {snippet}") + + lines.append("") + path.write_text("\n".join(lines), encoding="utf-8") + + +def write_migration_artifacts( + *, + workspace_dir: Path, + workspace_files: list[Path], + resolved_target: Path, + source_root: Path, + memory_entries: list[MemoryEntry], + config_obj: dict[str, Any] | None, + config_json_path: Path | None, +) -> tuple[int, int, bool, bool]: + workspace_target = resolved_target / "workspace" + workspace_target.mkdir(parents=True, exist_ok=True) + + copied_workspace_files = 0 + for src_file in workspace_files: + rel_path = src_file.relative_to(workspace_dir) + dst_file = workspace_target / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src_file, dst_file) + copied_workspace_files += 1 + + copied_memory_entries = 0 + wrote_timeline = False + if memory_entries: + _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) + copied_memory_entries = len(memory_entries) + _write_timeline( + resolved_target / "time_brief_history.md", + memory_entries, + source_root, + ) + wrote_timeline = True + + wrote_config_toml = False + if config_obj is not None: + (resolved_target / "config.original.json").write_text( + json.dumps(config_obj, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + try: + converted_toml = json_to_toml(config_obj) + except ValueError as exc: + source_hint = str(config_json_path) if config_json_path else "config JSON" + raise click.ClickException( + f"Failed to convert {source_hint} to TOML: {exc}" + ) from exc + (resolved_target / "config.migrated.toml").write_text( + converted_toml, + encoding="utf-8", + ) + wrote_config_toml = True + + return copied_workspace_files, copied_memory_entries, wrote_timeline, wrote_config_toml + + +__all__ = ["collect_workspace_files", "workspace_total_size", "write_migration_artifacts"] diff --git a/astrbot/cli/utils/openclaw_memory.py b/astrbot/cli/utils/openclaw_memory.py new file mode 100644 index 0000000000..efb71f1e78 --- /dev/null +++ b/astrbot/cli/utils/openclaw_memory.py @@ -0,0 +1,290 @@ +from __future__ import annotations + +import datetime as dt +import sqlite3 +from pathlib import Path +from typing import Any + +import click + +from .openclaw_models import MemoryEntry + +SQLITE_KEY_CANDIDATES = ("key", "id", "name") +SQLITE_CONTENT_CANDIDATES = ("content", "value", "text", "memory") +SQLITE_CATEGORY_CANDIDATES = ("category", "kind", "type") +SQLITE_TS_CANDIDATES = ("updated_at", "created_at", "timestamp", "ts", "time") + + +def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str | None: + for candidate in candidates: + if candidate in columns: + return candidate + return None + + +def _timestamp_from_epoch(raw: float | int | str) -> str | None: + try: + ts = float(raw) + if ts > 1e12: + ts /= 1000.0 + return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() + except Exception: + return None + + +def _normalize_timestamp(raw: Any) -> str | None: + if raw is None: + return None + + if isinstance(raw, (int, float)): + normalized = _timestamp_from_epoch(raw) + return normalized if normalized is not None else str(raw) + + text = str(raw).strip() + if not text: + return None + + if text.isdigit(): + normalized = _timestamp_from_epoch(text) + return normalized if normalized is not None else text + + maybe_iso = text.replace("Z", "+00:00") + try: + parsed = dt.datetime.fromisoformat(maybe_iso) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=dt.timezone.utc) + return parsed.isoformat() + except Exception: + return text + + +def _normalize_key(raw: Any, fallback_idx: int) -> str: + text = str(raw).strip() if raw is not None else "" + if text: + return text + return f"openclaw_{fallback_idx}" + + +def _parse_structured_line(line: str) -> tuple[str, str] | None: + if not line.startswith("**"): + return None + rest = line[2:] + marker = "**:" + marker_idx = rest.find(marker) + if marker_idx <= 0: + return None + key = rest[:marker_idx].strip() + value = rest[marker_idx + len(marker) :].strip() + if not key or not value: + return None + return key, value + + +def _discover_memory_columns( + cursor: sqlite3.Cursor, db_path: Path +) -> tuple[str, str, str | None, str | None]: + table_info_rows = cursor.execute("PRAGMA table_info(memories)").fetchall() + columns_in_order = [ + str(row[1]).strip().lower() + for row in table_info_rows + if str(row[1]).strip() + ] + columns = set(columns_in_order) + + key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) + if key_col is None: + pk_columns = sorted( + ( + (int(row[5]), str(row[1]).strip().lower()) + for row in table_info_rows + if int(row[5]) > 0 and str(row[1]).strip() + ), + key=lambda item: item[0], + ) + if pk_columns: + key_col = pk_columns[0][1] + if key_col is None: + try: + cursor.execute("SELECT rowid FROM memories LIMIT 1").fetchone() + key_col = "rowid" + except sqlite3.Error: + key_col = columns_in_order[0] if columns_in_order else None + + content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) + if content_col is None: + raise click.ClickException( + f"OpenClaw sqlite exists at {db_path}, but no content-like column found" + ) + if key_col is None: + raise click.ClickException( + f"OpenClaw sqlite exists at {db_path}, but no key-like or usable fallback column found" + ) + category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) + ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) + return key_col, content_col, category_col, ts_col + + +def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: + if not db_path.exists(): + return [] + + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + table_exists = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='memories' LIMIT 1" + ).fetchone() + if table_exists is None: + return [] + + key_col, content_col, category_col, ts_col = _discover_memory_columns( + cursor, db_path + ) + + select_clauses = [ + f"{key_col} AS __key__", + f"{content_col} AS __content__", + ( + f"{category_col} AS __category__" + if category_col is not None + else "'core' AS __category__" + ), + f"{ts_col} AS __timestamp__" if ts_col is not None else "NULL AS __timestamp__", + ] + order_by_clause = ( + " ORDER BY __timestamp__ ASC, __key__ ASC" + if ts_col is not None + else " ORDER BY __key__ ASC" + ) + rows = cursor.execute( + "SELECT " + ", ".join(select_clauses) + " FROM memories" + order_by_clause + ).fetchall() + + entries: list[MemoryEntry] = [] + for idx, row in enumerate(rows): + content = str(row["__content__"] or "").strip() + if not content: + continue + + entries.append( + MemoryEntry( + key=_normalize_key(row["__key__"], idx), + content=content, + category=str(row["__category__"] or "core").strip().lower() or "core", + timestamp=_normalize_timestamp(row["__timestamp__"]), + source=f"sqlite:{db_path}", + order=idx, + ) + ) + + return entries + except sqlite3.Error as exc: + raise click.ClickException( + f"Failed to read OpenClaw sqlite at {db_path}: {exc}" + ) from exc + finally: + if conn is not None: + conn.close() + + +def _parse_markdown_file( + path: Path, default_category: str, stem: str, order_offset: int +) -> list[MemoryEntry]: + content = path.read_text(encoding="utf-8", errors="replace") + mtime = _normalize_timestamp(path.stat().st_mtime) + entries: list[MemoryEntry] = [] + line_no = 0 + for raw_line in content.splitlines(): + line_no += 1 + stripped = raw_line.strip() + if not stripped or stripped.startswith("#"): + continue + + line = stripped[2:] if stripped.startswith("- ") else stripped + parsed = _parse_structured_line(line) + if parsed is not None: + key, text = parsed + key = _normalize_key(key, line_no) + body = text.strip() + else: + key = f"openclaw_{stem}_{line_no}" + body = line.strip() + + if not body: + continue + + entries.append( + MemoryEntry( + key=key, + content=body, + category=default_category, + timestamp=mtime, + source=f"markdown:{path}", + order=order_offset + len(entries), + ) + ) + return entries + + +def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: + entries: list[MemoryEntry] = [] + + core_path = workspace_dir / "MEMORY.md" + if core_path.exists(): + entries.extend( + _parse_markdown_file( + core_path, + default_category="core", + stem="core", + order_offset=len(entries), + ) + ) + + daily_dir = workspace_dir / "memory" + if daily_dir.exists(): + for md_path in sorted(daily_dir.glob("*.md")): + stem = md_path.stem or "daily" + entries.extend( + _parse_markdown_file( + md_path, + default_category="daily", + stem=stem, + order_offset=len(entries), + ) + ) + + return entries + + +def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: + seen_exact: set[tuple[str, str, str, str]] = set() + seen_semantic: set[tuple[str, str]] = set() + deduped: list[MemoryEntry] = [] + + for item in entries: + exact_key = ( + item.key.strip(), + item.content.strip(), + item.category.strip(), + item.timestamp or "", + ) + semantic_key = (item.content.strip(), item.category.strip()) + if exact_key in seen_exact or semantic_key in seen_semantic: + continue + seen_exact.add(exact_key) + seen_semantic.add(semantic_key) + deduped.append(item) + + return deduped + + +def collect_memory_entries(workspace_dir: Path) -> tuple[list[MemoryEntry], int, int]: + sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") + markdown_entries = _read_openclaw_markdown_entries(workspace_dir) + memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) + return memory_entries, len(sqlite_entries), len(markdown_entries) + + +__all__ = ["collect_memory_entries"] diff --git a/astrbot/cli/utils/openclaw_migrate.py b/astrbot/cli/utils/openclaw_migrate.py index 925a5693e0..bb9231cc99 100644 --- a/astrbot/cli/utils/openclaw_migrate.py +++ b/astrbot/cli/utils/openclaw_migrate.py @@ -2,342 +2,20 @@ import datetime as dt import json -import shutil -import sqlite3 -from dataclasses import asdict, dataclass +from dataclasses import asdict from pathlib import Path from typing import Any import click from .basic import check_astrbot_root -from .openclaw_toml import json_to_toml - -SQLITE_KEY_CANDIDATES = ("key", "id", "name") -SQLITE_CONTENT_CANDIDATES = ("content", "value", "text", "memory") -SQLITE_CATEGORY_CANDIDATES = ("category", "kind", "type") -SQLITE_TS_CANDIDATES = ("updated_at", "created_at", "timestamp", "ts", "time") - - -@dataclass(slots=True) -class MemoryEntry: - key: str - content: str - category: str - timestamp: str | None - source: str - order: int - - -@dataclass(slots=True) -class MigrationReport: - source_root: str - source_workspace: str - target_dir: str | None - dry_run: bool - memory_entries_total: int - memory_entries_from_sqlite: int - memory_entries_from_markdown: int - workspace_files_total: int - workspace_bytes_total: int - config_found: bool - copied_workspace_files: int - copied_memory_entries: int - wrote_timeline: bool - wrote_config_toml: bool - - -@dataclass(slots=True) -class MemoryCollection: - entries: list[MemoryEntry] - from_sqlite: int - from_markdown: int - - -@dataclass(slots=True) -class MigrationArtifacts: - copied_workspace_files: int - copied_memory_entries: int - wrote_timeline: bool - wrote_config_toml: bool - - -def _pick_existing_column(columns: set[str], candidates: tuple[str, ...]) -> str | None: - for candidate in candidates: - if candidate in columns: - return candidate - return None - - -def _timestamp_from_epoch(raw: float | int | str) -> str | None: - try: - ts = float(raw) - if ts > 1e12: - ts /= 1000.0 - return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).isoformat() - except Exception: - return None - - -def _normalize_timestamp(raw: Any) -> str | None: - if raw is None: - return None - - if isinstance(raw, (int, float)): - normalized = _timestamp_from_epoch(raw) - return normalized if normalized is not None else str(raw) - - text = str(raw).strip() - if not text: - return None - - if text.isdigit(): - normalized = _timestamp_from_epoch(text) - return normalized if normalized is not None else text - - maybe_iso = text.replace("Z", "+00:00") - try: - parsed = dt.datetime.fromisoformat(maybe_iso) - if parsed.tzinfo is None: - parsed = parsed.replace(tzinfo=dt.timezone.utc) - return parsed.isoformat() - except Exception: - return text - - -def _normalize_key(raw: Any, fallback_idx: int) -> str: - text = str(raw).strip() if raw is not None else "" - if text: - return text - return f"openclaw_{fallback_idx}" - - -def _parse_structured_line(line: str) -> tuple[str, str] | None: - if not line.startswith("**"): - return None - rest = line[2:] - marker = "**:" - marker_idx = rest.find(marker) - if marker_idx <= 0: - return None - key = rest[:marker_idx].strip() - value = rest[marker_idx + len(marker) :].strip() - if not key or not value: - return None - return key, value - - -def _discover_memory_columns( - cursor: sqlite3.Cursor, db_path: Path -) -> tuple[str, str, str | None, str | None]: - table_info_rows = cursor.execute("PRAGMA table_info(memories)").fetchall() - columns_in_order = [ - str(row[1]).strip().lower() - for row in table_info_rows - if str(row[1]).strip() - ] - columns = set(columns_in_order) - - key_col = _pick_existing_column(columns, SQLITE_KEY_CANDIDATES) - if key_col is None: - pk_columns = sorted( - ( - (int(row[5]), str(row[1]).strip().lower()) - for row in table_info_rows - if int(row[5]) > 0 and str(row[1]).strip() - ), - key=lambda item: item[0], - ) - if pk_columns: - key_col = pk_columns[0][1] - if key_col is None: - try: - cursor.execute("SELECT rowid FROM memories LIMIT 1").fetchone() - key_col = "rowid" - except sqlite3.Error: - key_col = columns_in_order[0] if columns_in_order else None - - content_col = _pick_existing_column(columns, SQLITE_CONTENT_CANDIDATES) - if content_col is None: - raise click.ClickException( - f"OpenClaw sqlite exists at {db_path}, but no content-like column found" - ) - if key_col is None: - raise click.ClickException( - f"OpenClaw sqlite exists at {db_path}, but no key-like or usable fallback column found" - ) - category_col = _pick_existing_column(columns, SQLITE_CATEGORY_CANDIDATES) - ts_col = _pick_existing_column(columns, SQLITE_TS_CANDIDATES) - return key_col, content_col, category_col, ts_col - - -def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: - if not db_path.exists(): - return [] - - conn: sqlite3.Connection | None = None - try: - conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - table_exists = cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='memories' LIMIT 1" - ).fetchone() - if table_exists is None: - return [] - - key_col, content_col, category_col, ts_col = _discover_memory_columns( - cursor, db_path - ) - - select_clauses = [ - f"{key_col} AS __key__", - f"{content_col} AS __content__", - ( - f"{category_col} AS __category__" - if category_col is not None - else "'core' AS __category__" - ), - f"{ts_col} AS __timestamp__" if ts_col is not None else "NULL AS __timestamp__", - ] - order_by_clause = ( - " ORDER BY __timestamp__ ASC, __key__ ASC" - if ts_col is not None - else " ORDER BY __key__ ASC" - ) - rows = cursor.execute( - "SELECT " + ", ".join(select_clauses) + " FROM memories" + order_by_clause - ).fetchall() - - entries: list[MemoryEntry] = [] - for idx, row in enumerate(rows): - content = str(row["__content__"] or "").strip() - if not content: - continue - - entries.append( - MemoryEntry( - key=_normalize_key(row["__key__"], idx), - content=content, - category=str(row["__category__"] or "core").strip().lower() or "core", - timestamp=_normalize_timestamp(row["__timestamp__"]), - source=f"sqlite:{db_path}", - order=idx, - ) - ) - - return entries - except sqlite3.Error as exc: - raise click.ClickException( - f"Failed to read OpenClaw sqlite at {db_path}: {exc}" - ) from exc - finally: - if conn is not None: - conn.close() - - -def _parse_markdown_file( - path: Path, default_category: str, stem: str, order_offset: int -) -> list[MemoryEntry]: - content = path.read_text(encoding="utf-8", errors="replace") - mtime = _normalize_timestamp(path.stat().st_mtime) - entries: list[MemoryEntry] = [] - line_no = 0 - for raw_line in content.splitlines(): - line_no += 1 - stripped = raw_line.strip() - if not stripped or stripped.startswith("#"): - continue - - line = stripped[2:] if stripped.startswith("- ") else stripped - parsed = _parse_structured_line(line) - if parsed is not None: - key, text = parsed - key = _normalize_key(key, line_no) - body = text.strip() - else: - key = f"openclaw_{stem}_{line_no}" - body = line.strip() - - if not body: - continue - - entries.append( - MemoryEntry( - key=key, - content=body, - category=default_category, - timestamp=mtime, - source=f"markdown:{path}", - order=order_offset + len(entries), - ) - ) - return entries - - -def _read_openclaw_markdown_entries(workspace_dir: Path) -> list[MemoryEntry]: - entries: list[MemoryEntry] = [] - - core_path = workspace_dir / "MEMORY.md" - if core_path.exists(): - entries.extend( - _parse_markdown_file( - core_path, - default_category="core", - stem="core", - order_offset=len(entries), - ) - ) - - daily_dir = workspace_dir / "memory" - if daily_dir.exists(): - for md_path in sorted(daily_dir.glob("*.md")): - stem = md_path.stem or "daily" - entries.extend( - _parse_markdown_file( - md_path, - default_category="daily", - stem=stem, - order_offset=len(entries), - ) - ) - - return entries - - -def _exact_signature(entry: MemoryEntry) -> str: - return "\x00".join( - [ - entry.key.strip(), - entry.content.strip(), - entry.category.strip(), - entry.timestamp or "", - ] - ) - - -def _semantic_signature(entry: MemoryEntry) -> str: - return "\x00".join( - [ - entry.content.strip(), - entry.category.strip(), - ] - ) - - -def _dedup_entries(entries: list[MemoryEntry]) -> list[MemoryEntry]: - seen_exact: set[str] = set() - seen_semantic: set[str] = set() - deduped: list[MemoryEntry] = [] - for item in entries: - exact_signature = _exact_signature(item) - semantic_signature = _semantic_signature(item) - if exact_signature in seen_exact or semantic_signature in seen_semantic: - continue - seen_exact.add(exact_signature) - seen_semantic.add(semantic_signature) - deduped.append(item) - return deduped +from .openclaw_artifacts import ( + collect_workspace_files, + workspace_total_size, + write_migration_artifacts, +) +from .openclaw_memory import collect_memory_entries +from .openclaw_models import MemoryEntry, MigrationReport def _find_source_workspace(source_root: Path) -> Path: @@ -360,80 +38,6 @@ def _find_openclaw_config_json(source_root: Path, workspace_dir: Path) -> Path | return None -def _collect_workspace_files( - workspace_dir: Path, *, exclude_dir: Path | None = None -) -> list[Path]: - files: list[Path] = [] - exclude_resolved = exclude_dir.resolve() if exclude_dir is not None else None - for path in workspace_dir.rglob("*"): - if not path.is_file() or path.is_symlink(): - continue - if exclude_resolved is not None: - try: - path.resolve().relative_to(exclude_resolved) - continue - except (OSError, ValueError): - pass - files.append(path) - return sorted(files) - - -def _workspace_total_size(files: list[Path]) -> int: - total_bytes = 0 - for path in files: - try: - total_bytes += path.stat().st_size - except OSError: - # Best-effort accounting: files may disappear or become unreadable - # during migration scans. - continue - return total_bytes - - -def _write_jsonl(path: Path, entries: list[MemoryEntry]) -> None: - with path.open("w", encoding="utf-8") as fp: - for entry in entries: - fp.write( - json.dumps( - { - "key": entry.key, - "content": entry.content, - "category": entry.category, - "timestamp": entry.timestamp, - "source": entry.source, - }, - ensure_ascii=False, - ) - + "\n" - ) - - -def _write_timeline(path: Path, entries: list[MemoryEntry], source_root: Path) -> None: - ordered = sorted(entries, key=lambda e: (e.timestamp or "", e.order)) - - lines: list[str] = [] - lines.append("# OpenClaw Migration - Time Brief History") - lines.append("") - lines.append("> 时间简史(初步方案):按时间汇总可迁移记忆条目。") - lines.append("") - lines.append(f"- Generated at: {dt.datetime.now(dt.timezone.utc).isoformat()}") - lines.append(f"- Source: `{source_root}`") - lines.append(f"- Total entries: {len(ordered)}") - lines.append("") - lines.append("## Timeline") - lines.append("") - - for entry in ordered: - ts = entry.timestamp or "unknown" - snippet = entry.content.replace("\n", " ").strip() - if len(snippet) > 160: - snippet = snippet[:157] + "..." - lines.append(f"- [{ts}] ({entry.category}) `{entry.key}`: {snippet}") - - lines.append("") - path.write_text("\n".join(lines), encoding="utf-8") - - def _load_json_or_raise(path: Path) -> dict[str, Any]: try: return json.loads(path.read_text(encoding="utf-8")) @@ -444,88 +48,26 @@ def _load_json_or_raise(path: Path) -> dict[str, Any]: ) from exc -def _collect_memory_entries(workspace_dir: Path) -> MemoryCollection: - sqlite_entries = _read_openclaw_sqlite_entries(workspace_dir / "memory" / "brain.db") - markdown_entries = _read_openclaw_markdown_entries(workspace_dir) - memory_entries = _dedup_entries([*sqlite_entries, *markdown_entries]) - return MemoryCollection( - entries=memory_entries, - from_sqlite=len(sqlite_entries), - from_markdown=len(markdown_entries), - ) +def _resolve_explicit_target_dir( + astrbot_root: Path, target_dir: Path | None +) -> Path | None: + if target_dir is None: + return None + return target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) -def _resolve_target_dir( +def _resolve_output_target_dir( astrbot_root: Path, target_dir: Path | None, dry_run: bool ) -> Path | None: if dry_run: return None - if target_dir is not None: - return target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) + explicit_target = _resolve_explicit_target_dir(astrbot_root, target_dir) + if explicit_target is not None: + return explicit_target run_id = dt.datetime.now().strftime("%Y%m%d-%H%M%S") return astrbot_root / "data" / "migrations" / "openclaw" / f"run-{run_id}" -def _write_migration_artifacts( - *, - workspace_dir: Path, - workspace_files: list[Path], - resolved_target: Path, - source_root: Path, - memory_entries: list[MemoryEntry], - config_obj: dict[str, Any] | None, - config_json_path: Path | None, -) -> MigrationArtifacts: - workspace_target = resolved_target / "workspace" - workspace_target.mkdir(parents=True, exist_ok=True) - - copied_workspace_files = 0 - for src_file in workspace_files: - rel_path = src_file.relative_to(workspace_dir) - dst_file = workspace_target / rel_path - dst_file.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(src_file, dst_file) - copied_workspace_files += 1 - - copied_memory_entries = 0 - wrote_timeline = False - if memory_entries: - _write_jsonl(resolved_target / "memory_entries.jsonl", memory_entries) - copied_memory_entries = len(memory_entries) - _write_timeline( - resolved_target / "time_brief_history.md", - memory_entries, - source_root, - ) - wrote_timeline = True - - wrote_config_toml = False - if config_obj is not None: - (resolved_target / "config.original.json").write_text( - json.dumps(config_obj, ensure_ascii=False, indent=2), - encoding="utf-8", - ) - try: - converted_toml = json_to_toml(config_obj) - except ValueError as exc: - source_hint = str(config_json_path) if config_json_path else "config JSON" - raise click.ClickException( - f"Failed to convert {source_hint} to TOML: {exc}" - ) from exc - (resolved_target / "config.migrated.toml").write_text( - converted_toml, - encoding="utf-8", - ) - wrote_config_toml = True - - return MigrationArtifacts( - copied_workspace_files=copied_workspace_files, - copied_memory_entries=copied_memory_entries, - wrote_timeline=wrote_timeline, - wrote_config_toml=wrote_config_toml, - ) - - def run_openclaw_migration( *, source_root: Path, @@ -542,37 +84,35 @@ def run_openclaw_migration( ) workspace_dir = _find_source_workspace(source_root) - memory_collection = _collect_memory_entries(workspace_dir) - memory_entries = memory_collection.entries - - explicit_target_dir: Path | None = None - if target_dir is not None: - explicit_target_dir = ( - target_dir if target_dir.is_absolute() else (astrbot_root / target_dir) - ) + memory_entries, from_sqlite, from_markdown = collect_memory_entries(workspace_dir) - workspace_files = _collect_workspace_files( + explicit_target_dir = _resolve_explicit_target_dir(astrbot_root, target_dir) + workspace_files = collect_workspace_files( workspace_dir, exclude_dir=explicit_target_dir, ) - workspace_total_bytes = _workspace_total_size(workspace_files) + workspace_total_bytes = workspace_total_size(workspace_files) config_json_path = _find_openclaw_config_json(source_root, workspace_dir) config_obj: dict[str, Any] | None = None if config_json_path is not None: config_obj = _load_json_or_raise(config_json_path) - resolved_target = _resolve_target_dir(astrbot_root, target_dir, dry_run) - artifacts = MigrationArtifacts( - copied_workspace_files=0, - copied_memory_entries=0, - wrote_timeline=False, - wrote_config_toml=False, - ) + resolved_target = _resolve_output_target_dir(astrbot_root, target_dir, dry_run) + + copied_workspace_files = 0 + copied_memory_entries = 0 + wrote_timeline = False + wrote_config_toml = False if not dry_run and resolved_target is not None: resolved_target.mkdir(parents=True, exist_ok=True) - artifacts = _write_migration_artifacts( + ( + copied_workspace_files, + copied_memory_entries, + wrote_timeline, + wrote_config_toml, + ) = write_migration_artifacts( workspace_dir=workspace_dir, workspace_files=workspace_files, resolved_target=resolved_target, @@ -588,15 +128,15 @@ def run_openclaw_migration( target_dir=str(resolved_target) if resolved_target else None, dry_run=dry_run, memory_entries_total=len(memory_entries), - memory_entries_from_sqlite=memory_collection.from_sqlite, - memory_entries_from_markdown=memory_collection.from_markdown, + memory_entries_from_sqlite=from_sqlite, + memory_entries_from_markdown=from_markdown, workspace_files_total=len(workspace_files), workspace_bytes_total=workspace_total_bytes, config_found=config_obj is not None, - copied_workspace_files=artifacts.copied_workspace_files, - copied_memory_entries=artifacts.copied_memory_entries, - wrote_timeline=artifacts.wrote_timeline, - wrote_config_toml=artifacts.wrote_config_toml, + copied_workspace_files=copied_workspace_files, + copied_memory_entries=copied_memory_entries, + wrote_timeline=wrote_timeline, + wrote_config_toml=wrote_config_toml, ) if not dry_run and resolved_target is not None: diff --git a/astrbot/cli/utils/openclaw_models.py b/astrbot/cli/utils/openclaw_models.py new file mode 100644 index 0000000000..3503b8c1e6 --- /dev/null +++ b/astrbot/cli/utils/openclaw_models.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(slots=True) +class MemoryEntry: + key: str + content: str + category: str + timestamp: str | None + source: str + order: int + + +@dataclass(slots=True) +class MigrationReport: + source_root: str + source_workspace: str + target_dir: str | None + dry_run: bool + memory_entries_total: int + memory_entries_from_sqlite: int + memory_entries_from_markdown: int + workspace_files_total: int + workspace_bytes_total: int + config_found: bool + copied_workspace_files: int + copied_memory_entries: int + wrote_timeline: bool + wrote_config_toml: bool + + +__all__ = ["MemoryEntry", "MigrationReport"] diff --git a/tests/unit/test_cli_cmd_migrate.py b/tests/unit/test_cli_cmd_migrate.py index 702da91102..27bf35caa9 100644 --- a/tests/unit/test_cli_cmd_migrate.py +++ b/tests/unit/test_cli_cmd_migrate.py @@ -52,3 +52,54 @@ def _fake_run_openclaw_migration(**kwargs: object) -> MigrationReport: assert "Timeline written: False" in result.output assert "Config TOML written: True" in result.output + +def test_migrate_openclaw_dry_run_explicit_target_prints_ignore_note( + monkeypatch, + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + + captured: dict[str, object] = {} + + def _fake_run_openclaw_migration(**kwargs: object) -> MigrationReport: + captured.update(kwargs) + return MigrationReport( + source_root=str(source_root), + source_workspace=str(source_root / "workspace"), + target_dir=None, + dry_run=True, + memory_entries_total=0, + memory_entries_from_sqlite=0, + memory_entries_from_markdown=0, + workspace_files_total=0, + workspace_bytes_total=0, + config_found=False, + copied_workspace_files=0, + copied_memory_entries=0, + wrote_timeline=False, + wrote_config_toml=False, + ) + + monkeypatch.setattr(cmd_migrate, "get_astrbot_root", lambda: astrbot_root) + monkeypatch.setattr(cmd_migrate, "run_openclaw_migration", _fake_run_openclaw_migration) + + runner = CliRunner() + result = runner.invoke( + cmd_migrate.migrate, + [ + "openclaw", + "--source", + str(source_root), + "--dry-run", + "--target", + "data/migrations/custom", + ], + ) + + assert result.exit_code == 0, result.output + assert captured["target_dir"] == Path("data/migrations/custom") + assert "Dry-run mode: no files were written." in result.output + assert "Note: --target is ignored when --dry-run is enabled." in result.output diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index b4306df319..0cea371abe 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -4,6 +4,8 @@ import sqlite3 from pathlib import Path +import pytest + from astrbot.cli.utils.openclaw_migrate import run_openclaw_migration from astrbot.cli.utils.openclaw_toml import json_to_toml @@ -295,6 +297,40 @@ def test_run_openclaw_migration_excludes_target_inside_workspace(tmp_path: Path) assert not (target / "workspace" / "snapshot-output" / "stale.txt").exists() +def test_run_openclaw_migration_does_not_follow_symlinked_workspace_dirs( + tmp_path: Path, +) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + workspace = source_root / "workspace" + external_dir = tmp_path / "external-data" + external_dir.mkdir(parents=True, exist_ok=True) + (external_dir / "outside.txt").write_text("outside", encoding="utf-8") + + symlink_dir = workspace / "symlinked-outside" + try: + symlink_dir.symlink_to(external_dir, target_is_directory=True) + except (NotImplementedError, OSError) as exc: + pytest.skip(f"symlink unsupported in test environment: {exc}") + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-symlink-scan"), + ) + + assert report.target_dir is not None + target = Path(report.target_dir) + assert not (target / "workspace" / "symlinked-outside" / "outside.txt").exists() + + def test_markdown_parsing_structured_and_plain_lines(tmp_path: Path) -> None: source_root = tmp_path / ".openclaw" source_root.mkdir(parents=True) From 6cd86dec3d2e225124a09da6f4a9bada4713dd79 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 03:32:26 -0500 Subject: [PATCH 13/14] fix(migrate): harden sqlite uri path handling and timeline markdown escaping --- astrbot/cli/utils/openclaw_artifacts.py | 4 +- astrbot/cli/utils/openclaw_memory.py | 4 +- tests/unit/test_cli_openclaw_migrate.py | 49 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/astrbot/cli/utils/openclaw_artifacts.py b/astrbot/cli/utils/openclaw_artifacts.py index 884215a84a..a094de3f1f 100644 --- a/astrbot/cli/utils/openclaw_artifacts.py +++ b/astrbot/cli/utils/openclaw_artifacts.py @@ -103,7 +103,9 @@ def _write_timeline(path: Path, entries: list[MemoryEntry], source_root: Path) - snippet = entry.content.replace("\n", " ").strip() if len(snippet) > 160: snippet = snippet[:157] + "..." - lines.append(f"- [{ts}] ({entry.category}) `{entry.key}`: {snippet}") + safe_key = (entry.key or "").replace("`", "\\`") + safe_snippet = snippet.replace("`", "\\`") + lines.append(f"- [{ts}] ({entry.category}) `{safe_key}`: {safe_snippet}") lines.append("") path.write_text("\n".join(lines), encoding="utf-8") diff --git a/astrbot/cli/utils/openclaw_memory.py b/astrbot/cli/utils/openclaw_memory.py index efb71f1e78..a8d334367e 100644 --- a/astrbot/cli/utils/openclaw_memory.py +++ b/astrbot/cli/utils/openclaw_memory.py @@ -4,6 +4,7 @@ import sqlite3 from pathlib import Path from typing import Any +from urllib.parse import quote import click @@ -130,7 +131,8 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: conn: sqlite3.Connection | None = None try: - conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) + db_uri = f"file:{quote(str(db_path.resolve()), safe='/:')}?mode=ro" + conn = sqlite3.connect(db_uri, uri=True) conn.row_factory = sqlite3.Row cursor = conn.cursor() table_exists = cursor.execute( diff --git a/tests/unit/test_cli_openclaw_migrate.py b/tests/unit/test_cli_openclaw_migrate.py index 0cea371abe..fc17173a08 100644 --- a/tests/unit/test_cli_openclaw_migrate.py +++ b/tests/unit/test_cli_openclaw_migrate.py @@ -455,6 +455,55 @@ def test_run_openclaw_migration_invalid_sqlite_raises_click_exception( assert "brain.db" in err_text +def test_run_openclaw_migration_supports_sqlite_path_with_spaces(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw with spaces" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=True, + ) + + assert report.memory_entries_from_sqlite >= 1 + + +def test_run_openclaw_migration_timeline_escapes_backticks(tmp_path: Path) -> None: + source_root = tmp_path / ".openclaw" + source_root.mkdir(parents=True) + _prepare_openclaw_source(source_root) + + memory_md = source_root / "workspace" / "MEMORY.md" + memory_md.write_text( + memory_md.read_text(encoding="utf-8") + + "- **key`tick**: content has `tick` too\n", + encoding="utf-8", + ) + + astrbot_root = tmp_path / "astrbot" + astrbot_root.mkdir(parents=True) + _prepare_astrbot_root(astrbot_root) + + report = run_openclaw_migration( + source_root=source_root, + astrbot_root=astrbot_root, + dry_run=False, + target_dir=Path("data/migrations/openclaw/test-timeline-backticks"), + ) + + assert report.target_dir is not None + timeline = (Path(report.target_dir) / "time_brief_history.md").read_text( + encoding="utf-8" + ) + assert "`key\\`tick`" in timeline + assert "content has \\`tick\\` too" in timeline + + def test_json_to_toml_quotes_special_keys() -> None: payload = { "normal key": "ok", From 63a4aeafd080a6b325e03d4c66aa5230b4c6efd2 Mon Sep 17 00:00:00 2001 From: Jacobinwwey Date: Sat, 21 Mar 2026 03:38:45 -0500 Subject: [PATCH 14/14] fix(migrate): build sqlite readonly uri via pathlib as_uri --- astrbot/cli/utils/openclaw_memory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/astrbot/cli/utils/openclaw_memory.py b/astrbot/cli/utils/openclaw_memory.py index a8d334367e..fd440d80eb 100644 --- a/astrbot/cli/utils/openclaw_memory.py +++ b/astrbot/cli/utils/openclaw_memory.py @@ -4,7 +4,6 @@ import sqlite3 from pathlib import Path from typing import Any -from urllib.parse import quote import click @@ -131,7 +130,7 @@ def _read_openclaw_sqlite_entries(db_path: Path) -> list[MemoryEntry]: conn: sqlite3.Connection | None = None try: - db_uri = f"file:{quote(str(db_path.resolve()), safe='/:')}?mode=ro" + db_uri = f"{db_path.resolve().as_uri()}?mode=ro" conn = sqlite3.connect(db_uri, uri=True) conn.row_factory = sqlite3.Row cursor = conn.cursor()