Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ Thumbs.db
# Build output
out/


# scratch files used by AI review triage
.tmp-*.json
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

新增的 .tmp-*.json 文件可能会导致敏感数据泄露,建议在版本控制中忽略此类临时文件。同时,确保这些临时文件不会被意外上传到生产环境中。请考虑使用 .gitignore 文件来排除这些临时文件。

296 changes: 296 additions & 0 deletions src/chat/archive-export.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
// Export a chat session to a Markdown file under the workspace.
//
// Issue #165: the right-click "📦 Archive" action used to be a soft hide
// (toggle `archived` flag). Users expected real archiving — a Markdown
// snapshot they can grep, commit, or share. This module renders the
// session record to Markdown and writes it under
// `<workspace>/.deepcopilot/archives/yyyyMMdd-HHmmss-<title>.md`.
//
// Edge cases handled:
// - No workspace open → fall back to vscode.window.showSaveDialog.
// - Multi-root workspace → showWorkspaceFolderPick to choose target.
// - Path traversal → resolved path must stay under chosen root
// (defence in depth even though titles are
// already sanitised).
// - Name collision → append "-1", "-2", … suffix.
'use strict';

const vscode = require('vscode');
const path = require('path');
const fs = require('fs/promises');
const { t } = require('../utils/i18n');

const ARCHIVE_SUBDIR = '.deepcopilot/archives';

/**
* Strip filesystem-hostile characters and trim length.
* Removed character classes:
* - `\ / : * ? " < > |` are reserved on Windows.
* - `\u0000-\u001f` covers C0 control codes (NUL, newlines, tabs, ESC, …),
* which corrupt filenames and can be abused for terminal injection when
* the path is later printed to a log.
* Leading dots are also stripped so we never produce a hidden file (`.foo`)
* or a relative-path escape (`..`).
Comment on lines +25 to +33
*/
function _safeTitle(raw) {
const s = String(raw || '').trim();
if (!s) return 'untitled';
const cleaned = s
.replace(/[\\/:*?"<>|\u0000-\u001f]/g, '_')
.replace(/^\.+/, '_')
.replace(/\s+/g, ' ')
.trim()
// Windows: Win32 APIs strip/normalise trailing spaces and dots from
// path components, which turns "foo ." / "foo " into "foo" silently
// — or rejects the write outright. Strip them ourselves so the
// on-disk name matches what we report back to the user and the
// collision counter in _writeUnique can’t be defeated.
.replace(/[. ]+$/, '');
return (cleaned || 'untitled').slice(0, 60).replace(/[. ]+$/, '') || 'untitled';
}

/** "20260526-143012" — local time, fixed-width, sortable. */
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在这一段中,增加了对尾部空格和点的处理,这有助于避免在文件系统中出现不一致的文件名。然而,最后的 replace(/[. ]+$/, '') 可能会导致返回的字符串为空,因此在 slice(0, 60) 后面需要确保返回值不为空。建议在返回前添加一个检查,确保返回的字符串至少有一个字符,避免潜在的空指针异常。

function _timestamp(d = new Date()) {
const pad = (n) => String(n).padStart(2, '0');
return (
d.getFullYear().toString() +
pad(d.getMonth() + 1) +
pad(d.getDate()) +
'-' +
pad(d.getHours()) +
pad(d.getMinutes()) +
pad(d.getSeconds())
);
}

/** Render YAML frontmatter from primitive key/value pairs. */
function _frontmatter(meta) {
const lines = ['---'];
for (const [k, v] of Object.entries(meta)) {
if (v == null || v === '') continue;
// Always quote string values: bare YAML scalars like `true`,
// `2026-05-26`, `null`, `123` would be coerced to bool/date/null/
// number by any YAML parser, silently corrupting the exported
// metadata if a session title or model name happens to match one
// of those forms. Numbers stay bare because their identity is
// preserved either way and bare numerics read more naturally.
if (typeof v === 'number' && Number.isFinite(v)) {
lines.push(`${k}: ${v}`);
} else {
lines.push(`${k}: ${JSON.stringify(String(v))}`);
}
}
lines.push('---', '');
return lines.join('\n');
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在这一段中,增加了对所有字符串值的引号处理,这样可以避免 YAML 解析时的潜在问题。然而,当前的实现对数字类型的处理可能会导致意外的行为,特别是在处理 nullundefined 时。建议在处理 nullundefined 时,明确地将其转换为字符串,以避免在 YAML 中出现不一致的表现。

}

/** Wrap reasoning/thoughts in a collapsible <details> block. */
function _renderThoughts(thoughts) {
if (!thoughts) return '';
return [
'<details>',
`<summary>${t('archiveThoughtsLabel')}</summary>`,
'',
thoughts.trim(),
'',
'</details>',
'',
].join('\n');
}

/**
* Collapse newlines/tabs/control chars in a session title down to a single
* space before it is injected into a Markdown `# ...` heading. Without this,
* a title that contains "\n" (e.g. taken from the first user message or a
* pasted rename) would split the heading and break the document structure.
*/
function _safeHeadingTitle(raw) {
return String(raw || '')
.replace(/[\u0000-\u001f\u007f]+/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}

/**
* Render a session record to a Markdown string.
* The record shape mirrors what SessionStore.append() persists:
* { id, title, createdAt, updatedAt, model, mode, ws, msgCount,
* messages: [{ role: 'user'|'assistant', text, thoughts? }, ...] }
*/
function renderSessionMarkdown(session) {
const created = session.createdAt ? new Date(session.createdAt).toISOString() : '';
const updated = session.updatedAt ? new Date(session.updatedAt).toISOString() : '';
const archived = new Date().toISOString();

// `provider` is not persisted on the session record (only `model`/`mode`
// are), so we read the live setting at archive time. Token totals come
// from `session.totals`, which SessionStore accumulates per turn — see
// session-store.js ~L263. Both fields are best-effort: missing values
// are omitted by `_frontmatter` rather than rendered as empty strings.
let provider = '';
try {
provider = vscode.workspace.getConfiguration('deepseekAgent').get('provider') || '';
} catch { /* tests / no vscode runtime */ }
const totals = session.totals || {};

const head = _frontmatter({
sessionId: session.id || '',
title: session.title || '',
createdAt: created,
updatedAt: updated,
Comment on lines +136 to +140
archivedAt: archived,
provider,
model: session.model || '',
mode: session.mode || '',
messageCount: session.msgCount || (session.messages || []).length,
promptTokens: Number(totals.prompt_tokens) || 0,
completionTokens: Number(totals.completion_tokens) || 0,
totalTokens: Number(totals.total_tokens) || 0,
workspace: session.ws || '',
Comment on lines +125 to +149
});

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在获取配置时,使用了 try-catch 来处理可能的异常,但没有明确捕获异常类型,可能会掩盖其他潜在问题。建议在 catch 中添加错误处理逻辑,以便于调试和日志记录。此外,provider 的默认值为空字符串,可能会导致后续逻辑中出现空值问题,建议在使用前进行有效性检查。

const heading = _safeHeadingTitle(session.title) || t('sessionUntitled');
const parts = [head, `# ${heading}`, ''];
const messages = Array.isArray(session.messages) ? session.messages : [];
for (const m of messages) {
Comment on lines +150 to +155
if (!m) continue;
if (m.role === 'user') {
parts.push(`### 🧑 ${t('archiveRoleUser')}`, '', String(m.text || '').trim(), '');
Comment on lines +154 to +158
} else if (m.role === 'assistant') {
parts.push(`### 🤖 ${t('archiveRoleAssistant')}`, '');
const thoughts = _renderThoughts(m.thoughts);
if (thoughts) parts.push(thoughts);
const body = String(m.text || '').trim();
if (body) parts.push(body, '');
} else {
// Defensive: render unknown roles verbatim so nothing is silently lost.
parts.push(`### ${m.role || 'message'}`, '', String(m.text || '').trim(), '');
}
}

// Compose the document. We intentionally do NOT run a global
// `\n{3,}` collapse here — that would mutate verbatim user/assistant
// text and break formatting inside fenced code blocks. Instead, each
// section pushes its own controlled trailing blank line.
return parts.join('\n').trimEnd() + '\n';
}

/**
* Sentinel returned by `_pickWorkspaceRoot` when the user explicitly
* dismissed the multi-root workspace folder picker. We MUST distinguish this
* from the "no workspace open" case (returns `null`): in the cancel case we
* should abort the archive cleanly, not silently fall back to a save dialog
* (which would happily let the user save outside any workspace).
*/
const PICK_CANCELLED = Symbol('pick-cancelled');

/**
* Pick the target workspace folder.
* - 0 folders → returns `null` (caller falls back to save dialog).
* - 1 folder → returns its fsPath.
* - 2+ → returns the picked fsPath, or `PICK_CANCELLED` if the
* user dismissed the picker.
* @param {string} _sessionWs — the workspace the session was created in.
* Historically used to skip the picker when it matched a folder, but in
* practice `session.ws` is always derived from `workspaceFolders[0]` (see
* `ChatProvider._currentWs()`), so that shortcut effectively pinned the
* archive to folder[0] and silently bypassed the picker. Now we always
* show the picker when there are 2+ folders — the user explicitly chose
* to archive *something*, asking which root takes a second of their time
* and avoids surprising writes into the wrong project.
*/
async function _pickWorkspaceRoot(_sessionWs) {
const folders = vscode.workspace.workspaceFolders;
if (!folders || folders.length === 0) return null;
if (folders.length === 1) return folders[0].uri.fsPath;
const picked = await vscode.window.showWorkspaceFolderPick({
placeHolder: t('archivePickWorkspace'),
});
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

函数参数 _sessionWs 的命名不符合现有代码风格,建议保持一致性,使用 sessionWs。此外,虽然注释中提到的逻辑变更是合理的,但需要确保在多根文件夹的情况下,用户体验不会受到影响,建议在实现后进行充分的测试。

return picked ? picked.uri.fsPath : PICK_CANCELLED;
}

/**
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在返回值中引入了 PICK_CANCELLED 符号,虽然可以帮助区分用户取消选择的情况,但需要确保调用此函数的地方都能正确处理这一返回值,避免出现未处理的异常或逻辑错误。建议在相关调用处添加相应的处理逻辑。

* Reserve a non-colliding path AND write content atomically through an
* exclusive handle. `fs.open(..., 'wx')` closes the TOCTOU window that an
* `fs.access` pre-check would leave open (two concurrent archive clicks in
* the same second could otherwise pick the same name).
*
* Writing through the exclusive handle — rather than reserving an empty
* placeholder and then re-opening with `fs.writeFile` — prevents zero-byte
* residue when the write itself fails (disk full / permission revoked
* mid-write). On error we close the handle and `unlink` the placeholder so
* subsequent archives don't skip the now-orphaned name.
*/
async function _writeUnique(dir, baseName, content) {
const ext = '.md';
const stem = baseName.replace(/\.md$/i, '');
for (let i = 0; i < 1000; i++) {
const candidate = path.join(dir, i === 0 ? stem + ext : `${stem}-${i}${ext}`);
let handle;
try {
handle = await fs.open(candidate, 'wx');
} catch (err) {
Comment on lines +212 to +232
if (err && err.code === 'EEXIST') continue;
throw err;
}
try {
await handle.writeFile(content, 'utf8');
await handle.close();
return candidate;
} catch (writeErr) {
// Close best-effort, then remove the empty/partial placeholder.
try { await handle.close(); } catch { /* ignore */ }
try { await fs.unlink(candidate); } catch { /* ignore */ }
throw writeErr;
}
}
// Extremely unlikely (1000 same-second collisions); bail out with a
// timestamped name and a regular write — still safer than overwriting.
const fallback = path.join(dir, `${stem}-${Date.now()}${ext}`);
await fs.writeFile(fallback, content, { encoding: 'utf8', flag: 'wx' });
return fallback;
}

/**
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_writeUnique 函数中,虽然处理了写入失败的情况,但在 handle.writeFile(content, 'utf8'); 这一行,handle 可能会是未定义的,建议在使用前确保 handle 已成功打开。此外,fs.unlink(candidate) 的调用可能会因为文件不存在而抛出异常,建议在调用时加上错误处理。

* Resolve the destination path, then write the markdown.
* Returns the absolute path written, or `null` if:
* - the user cancelled the multi-root workspace folder picker, OR
* - the user cancelled the save dialog in the no-workspace fallback.
* Throws on filesystem errors so the caller can surface a friendly message.
*/
async function exportSessionToMarkdown(session) {
const md = renderSessionMarkdown(session);
const fileName = `${_timestamp()}-${_safeTitle(session.title)}.md`;

const root = await _pickWorkspaceRoot(session.ws);
if (root === PICK_CANCELLED) return null; // user dismissed the picker
if (root) {
const archiveDir = path.join(root, ARCHIVE_SUBDIR);
// Defence in depth: even though fileName is sanitised, verify the
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在处理用户取消的情况时,返回 null 是合理的,但需要确保调用此函数的地方能够正确处理 null 返回值,避免出现空指针异常。建议在调用 exportSessionToMarkdown 的地方添加对返回值的检查。

// resolved path stays inside the chosen root before writing.
const resolved = path.resolve(archiveDir, fileName);
const rel = path.relative(root, resolved);
if (rel.startsWith('..') || path.isAbsolute(rel)) {
// i18n'd, user-facing — see archiveErrEscape in src/utils/i18n.js.
throw new Error(t('archiveErrEscape'));
}
await fs.mkdir(archiveDir, { recursive: true });
Comment on lines +269 to +277
return await _writeUnique(archiveDir, fileName, md);
}

// No workspace open — ask the user where to put it. `Uri.file()` requires
// an absolute path: passing a bare filename resolves to a confusing
// location (drive root on Windows, `/` on POSIX). Anchor the default at
// the user's home so the dialog opens somewhere predictable.
const os = require('os');
const uri = await vscode.window.showSaveDialog({
saveLabel: t('archiveSaveLabel'),
filters: { Markdown: ['md'] },
defaultUri: vscode.Uri.file(path.join(os.homedir(), fileName)),
});
if (!uri) return null;
await fs.writeFile(uri.fsPath, md, 'utf8');
return uri.fsPath;
}

module.exports = { exportSessionToMarkdown, renderSessionMarkdown };
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. 安全性: 在 _uniquePath 函数中,虽然使用了 fs.access 来检查文件是否存在,但没有考虑到并发写入的情况,可能会导致竞态条件。建议在生成文件名时加锁或使用其他机制确保唯一性。

  2. 异常处理: 在 exportSessionToMarkdown 函数中,虽然有抛出错误的处理,但在调用 fs.mkdirfs.writeFile 时,如果发生异常,应该有更详细的错误处理机制,以便于调试和用户友好提示。

  3. 代码风格: 代码整体风格较为一致,但在某些地方(如 _safeTitle 函数)可以考虑增加注释以提高可读性,尤其是正则表达式的部分。

  4. 性能: 在 _uniquePath 函数中,循环最多会执行 1000 次,这可能会影响性能,尤其是在文件系统较慢的情况下。建议考虑更高效的文件名生成策略。

  5. 可维护性: 函数 _renderThoughts 中对 thoughts 的处理较为简单,建议增加对输入的验证,以防止潜在的 XSS 攻击。

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exportSessionToMarkdown 函数中,_writeUnique 函数的调用后没有检查返回值是否有效,建议在写入文件后检查返回值,确保文件成功写入。同时,await fs.writeFile(uri.fsPath, md, 'utf8'); 这一行没有处理可能的异常,建议加上异常处理逻辑。

Loading
Loading