From 38e75fca0fe4c31ed4b3110020d689dcb1e2fd19 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Tue, 5 May 2026 10:44:30 +0530 Subject: [PATCH 1/6] fix(arborist): clean up orphan top-level symlinks in linked strategy --- workspaces/arborist/lib/arborist/reify.js | 107 ++++++++++++++++++++-- workspaces/arborist/test/isolated-mode.js | 76 +++++++++++++++ 2 files changed, 175 insertions(+), 8 deletions(-) diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index da5f4a73bc79f..84769714e2b85 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -1281,35 +1281,126 @@ module.exports = cls => class Reifier extends cls { // After a linked install, scan node_modules/.store/ and remove any directories that are not referenced by the current ideal tree. // Store entries become orphaned when dependencies are updated or removed, because the diff never sees the old store keys. + // Then sweep the top-level node_modules/ for orphaned symlinks (e.g. an uninstalled dep whose store entry was just removed) so we don't leave dangling links. async #cleanOrphanedStoreEntries () { - const storeDir = resolve(this.path, 'node_modules', '.store') + const nmDir = resolve(this.path, 'node_modules') + const storeDir = resolve(nmDir, '.store') + let entries try { entries = await readdir(storeDir) } catch { - return + entries = null } - // Collect valid store keys from the isolated ideal tree (location: node_modules/.store/{key}/node_modules/{pkg}) + // Collect valid store keys and valid top-level links per node_modules directory. + // Store entries have location node_modules/.store/{key}/node_modules/{pkg}. + // Top-level links have location {prefix}/node_modules/{pkg} or {prefix}/node_modules/@scope/{pkg}, where {prefix} is empty for the root project and the workspace's localLocation for workspace deps. const validKeys = new Set() + const nmDirs = new Map() + const storeMarker = `${sep}.store${sep}` for (const child of this.idealTree.children.values()) { if (child.isInStore) { const key = child.location.split(sep)[2] validKeys.add(key) + continue + } + if (!child.isLink) { + continue + } + const nmIdx = child.location.lastIndexOf(`node_modules${sep}`) + if (nmIdx === -1 || child.location.includes(storeMarker)) { + continue + } + const prefix = child.location.slice(0, nmIdx) + const dir = resolve(this.path, prefix, 'node_modules') + const rest = child.location.slice(nmIdx + `node_modules${sep}`.length) + let entry + if (rest.startsWith('@')) { + const [scope, name] = rest.split(sep) + entry = `${scope}${sep}${name}` + } else { + entry = rest.split(sep)[0] + } + let set = nmDirs.get(dir) + if (!set) { + set = new Set() + nmDirs.set(dir, set) + } + set.add(entry) + } + // Always sweep the root node_modules even if no top-level links remain (e.g. last dep was just uninstalled) + if (!nmDirs.has(nmDir)) { + nmDirs.set(nmDir, new Set()) + } + + if (entries) { + const orphaned = entries.filter(e => !validKeys.has(e)) + if (orphaned.length) { + log.silly('reify', 'cleaning orphaned store entries', orphaned) + await promiseAllRejectLate( + orphaned.map(e => + rm(resolve(storeDir, e), { recursive: true, force: true }) + .catch(/* istanbul ignore next -- rm with force rarely fails */ + er => log.warn('cleanup', `Failed to remove orphaned store entry ${e}`, er)) + ) + ) + } + } + + for (const [dir, valid] of nmDirs) { + await this.#cleanOrphanedTopLevelLinks(dir, valid) + } + } + + /* Remove node_modules/ entries that aren't represented in the ideal tree. + * Run for the project root and each workspace's node_modules. + * The linked diff path can't see these because #buildLinkedActualForDiff derives the actual tree from the ideal, so removed deps are never compared. + * Only symlinks are removed; real directories are left alone since they were created by another tool and aren't ours to delete. + */ + async #cleanOrphanedTopLevelLinks (nmDir, validTopLevel) { + let dirents + try { + dirents = await readdir(nmDir, { withFileTypes: true }) + } catch { + return + } + + const orphaned = [] + for (const ent of dirents) { + // skip npm-managed entries (.bin, .store, .package-lock.json, etc) + if (ent.name.startsWith('.')) { + continue + } + if (ent.name.startsWith('@')) { + let scoped + try { + scoped = await readdir(resolve(nmDir, ent.name), { withFileTypes: true }) + } catch { + /* istanbul ignore next -- readdir of an entry we just listed should not fail */ + continue + } + for (const pkgEnt of scoped) { + const key = `${ent.name}${sep}${pkgEnt.name}` + if (!validTopLevel.has(key) && pkgEnt.isSymbolicLink()) { + orphaned.push(key) + } + } + } else if (!validTopLevel.has(ent.name) && ent.isSymbolicLink()) { + orphaned.push(ent.name) } } - const orphaned = entries.filter(e => !validKeys.has(e)) if (!orphaned.length) { return } - log.silly('reify', 'cleaning orphaned store entries', orphaned) + log.silly('reify', 'cleaning orphaned top-level links', orphaned) await promiseAllRejectLate( - orphaned.map(e => - rm(resolve(storeDir, e), { recursive: true, force: true }) + orphaned.map(name => + rm(resolve(nmDir, name), { recursive: true, force: true }) .catch(/* istanbul ignore next -- rm with force rarely fails */ - er => log.warn('cleanup', `Failed to remove orphaned store entry ${e}`, er)) + er => log.warn('cleanup', `Failed to remove orphaned link ${name}`, er)) ) ) } diff --git a/workspaces/arborist/test/isolated-mode.js b/workspaces/arborist/test/isolated-mode.js index fac6f6beee935..4d704130c6394 100644 --- a/workspaces/arborist/test/isolated-mode.js +++ b/workspaces/arborist/test/isolated-mode.js @@ -1794,6 +1794,82 @@ tap.test('orphaned store entries are cleaned up on dependency removal', async t const entriesAfterRemoval = fs.readdirSync(storeDir) t.equal(entriesAfterRemoval.length, 0, 'all store entries are removed when dependencies are removed') + + /* https://github.com/npm/cli/issues/9308 — the top-level node_modules symlink for the removed dep was left behind, dangling into the just-cleaned store. */ + t.notOk(fs.existsSync(path.join(dir, 'node_modules', 'which')), + 'top-level symlink for removed dependency is also cleaned up') +}) + +tap.test('orphaned link inside workspace node_modules is cleaned up on dependency removal', async t => { + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + ], + root: { + name: 'root', + version: '1.0.0', + }, + workspaces: [ + { name: 'a', version: '1.0.0', dependencies: { abbrev: '^4.0.0' } }, + ], + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + const wsLink = path.join(dir, 'packages', 'a', 'node_modules', 'abbrev') + t.ok(fs.existsSync(wsLink), 'abbrev is linked into workspace node_modules') + + // Drop abbrev from the workspace package.json + const wsPkgPath = path.join(dir, 'packages', 'a', 'package.json') + const wsPkg = JSON.parse(fs.readFileSync(wsPkgPath, 'utf8')) + delete wsPkg.dependencies + fs.writeFileSync(wsPkgPath, JSON.stringify(wsPkg)) + + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb2.reify({ installStrategy: 'linked' }) + + t.notOk(fs.existsSync(wsLink), 'abbrev symlink target no longer resolves') + t.notOk(fs.lstatSync(wsLink, { throwIfNoEntry: false }), + 'abbrev symlink itself is removed from workspace node_modules') +}) + +tap.test('orphaned scoped top-level link is cleaned up when only one of two scoped deps is removed', async t => { + const graph = { + registry: [ + { name: '@scope/a', version: '1.0.0' }, + { name: '@scope/b', version: '1.0.0' }, + ], + root: { + name: 'myproject', + version: '1.0.0', + dependencies: { '@scope/a': '1.0.0', '@scope/b': '1.0.0' }, + }, + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + t.ok(fs.existsSync(path.join(dir, 'node_modules', '@scope', 'a')), '@scope/a installed') + t.ok(fs.existsSync(path.join(dir, 'node_modules', '@scope', 'b')), '@scope/b installed') + + // Drop @scope/a from package.json + const pkgPath = path.join(dir, 'package.json') + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')) + delete pkg.dependencies['@scope/a'] + fs.writeFileSync(pkgPath, JSON.stringify(pkg)) + + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb2.reify({ installStrategy: 'linked' }) + + t.notOk(fs.existsSync(path.join(dir, 'node_modules', '@scope', 'a')), + '@scope/a top-level symlink is removed') + t.ok(fs.existsSync(path.join(dir, 'node_modules', '@scope', 'b')), + '@scope/b top-level symlink is preserved') }) tap.test('store symlinks are updated when hash changes after adding a dep', async t => { From b66d5ea25114d9ec5432fec7683040d4ff80a920 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Tue, 5 May 2026 10:55:29 +0530 Subject: [PATCH 2/6] fix(arborist): also sweep declared-workspace node_modules for orphan links --- workspaces/arborist/lib/arborist/reify.js | 30 +++++- workspaces/arborist/test/isolated-mode.js | 120 ++++++++++++++++++++++ 2 files changed, 145 insertions(+), 5 deletions(-) diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 84769714e2b85..f793933f2d1f6 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -11,7 +11,7 @@ const { depth: dfwalk } = require('treeverse') const { dirname, resolve, relative, join, sep } = require('node:path') const { log, time } = require('proc-log') const { existsSync } = require('node:fs') -const { lstat, mkdir, readdir, rm, symlink } = require('node:fs/promises') +const { lstat, mkdir, readdir, readlink, rm, symlink } = require('node:fs/promises') const { moveFile } = require('@npmcli/fs') const { subset, intersects } = require('semver') const { walkUp } = require('walk-up-path') @@ -1329,10 +1329,17 @@ module.exports = cls => class Reifier extends cls { } set.add(entry) } - // Always sweep the root node_modules even if no top-level links remain (e.g. last dep was just uninstalled) + // Always sweep the root node_modules and every workspace's node_modules even if no top-level links remain (e.g. last dep was just uninstalled). + // Without this, a workspace whose only remaining symlinks were removed would never be revisited and would leak orphan links. if (!nmDirs.has(nmDir)) { nmDirs.set(nmDir, new Set()) } + for (const ws of this.idealTree.fsChildren) { + const wsNmDir = resolve(ws.path, 'node_modules') + if (!nmDirs.has(wsNmDir)) { + nmDirs.set(wsNmDir, new Set()) + } + } if (entries) { const orphaned = entries.filter(e => !validKeys.has(e)) @@ -1356,9 +1363,11 @@ module.exports = cls => class Reifier extends cls { /* Remove node_modules/ entries that aren't represented in the ideal tree. * Run for the project root and each workspace's node_modules. * The linked diff path can't see these because #buildLinkedActualForDiff derives the actual tree from the ideal, so removed deps are never compared. - * Only symlinks are removed; real directories are left alone since they were created by another tool and aren't ours to delete. + * Only symlinks whose target resolves inside the project root are removed — that covers store links (node_modules/.store/...) and workspace self-links (e.g. node_modules/ -> ../packages/) that npm itself created. + * Symlinks pointing outside the project (e.g. `npm link foo` without --save targeting the global prefix, or hand-made `ln -s` to an external path) and real directories are preserved. */ async #cleanOrphanedTopLevelLinks (nmDir, validTopLevel) { + const projectPrefix = resolve(this.path) + sep let dirents try { dirents = await readdir(nmDir, { withFileTypes: true }) @@ -1366,6 +1375,17 @@ module.exports = cls => class Reifier extends cls { return } + const isOurOrphan = async (linkPath) => { + let target + try { + target = await readlink(linkPath) + } catch { + /* istanbul ignore next -- readlink of an entry we just listed as a symlink should not fail */ + return false + } + return resolve(dirname(linkPath), target).startsWith(projectPrefix) + } + const orphaned = [] for (const ent of dirents) { // skip npm-managed entries (.bin, .store, .package-lock.json, etc) @@ -1382,11 +1402,11 @@ module.exports = cls => class Reifier extends cls { } for (const pkgEnt of scoped) { const key = `${ent.name}${sep}${pkgEnt.name}` - if (!validTopLevel.has(key) && pkgEnt.isSymbolicLink()) { + if (!validTopLevel.has(key) && pkgEnt.isSymbolicLink() && await isOurOrphan(resolve(nmDir, key))) { orphaned.push(key) } } - } else if (!validTopLevel.has(ent.name) && ent.isSymbolicLink()) { + } else if (!validTopLevel.has(ent.name) && ent.isSymbolicLink() && await isOurOrphan(resolve(nmDir, ent.name))) { orphaned.push(ent.name) } } diff --git a/workspaces/arborist/test/isolated-mode.js b/workspaces/arborist/test/isolated-mode.js index 4d704130c6394..89939c6e68c85 100644 --- a/workspaces/arborist/test/isolated-mode.js +++ b/workspaces/arborist/test/isolated-mode.js @@ -1836,6 +1836,126 @@ tap.test('orphaned link inside workspace node_modules is cleaned up on dependenc 'abbrev symlink itself is removed from workspace node_modules') }) +tap.test('orphaned link in declared-workspace node_modules is cleaned up when last dep is removed', async t => { + // Reproduces the case where the workspace is also a root dependency, so its self-link sits at the ROOT node_modules and the workspace's own node_modules has no surviving links after removing its only dep. + // Without explicitly seeding each workspace's node_modules into the sweep map, that directory would never be visited and the orphan symlink would remain. + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + ], + root: { + name: 'root', + version: '1.0.0', + dependencies: { a: '*' }, + }, + workspaces: [ + { name: 'a', version: '1.0.0', dependencies: { abbrev: '^4.0.0' } }, + ], + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + const wsLink = path.join(dir, 'packages', 'a', 'node_modules', 'abbrev') + t.ok(fs.lstatSync(wsLink, { throwIfNoEntry: false }), 'abbrev is linked into workspace node_modules') + + const wsPkgPath = path.join(dir, 'packages', 'a', 'package.json') + const wsPkg = JSON.parse(fs.readFileSync(wsPkgPath, 'utf8')) + delete wsPkg.dependencies + fs.writeFileSync(wsPkgPath, JSON.stringify(wsPkg)) + + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb2.reify({ installStrategy: 'linked' }) + + t.notOk(fs.lstatSync(wsLink, { throwIfNoEntry: false }), + 'abbrev symlink is removed even though the workspace itself is the only declared root dep') +}) + +tap.test('orphaned workspace self-link in root node_modules is cleaned up when workspace is undeclared', async t => { + // When root declares a workspace as a dependency, the workspace gets a self-link at root node_modules (e.g. node_modules/a -> ../packages/a). + // If the workspace is later removed from root's dependencies, that self-link must be cleaned up. + // It is a symlink npm itself created, but its target lives outside .store/, so the sweep must accept any orphan whose target resolves inside the project root. + const graph = { + registry: [], + root: { + name: 'root', + version: '1.0.0', + dependencies: { a: '*', b: '*' }, + }, + workspaces: [ + { name: 'a', version: '1.0.0' }, + { name: 'b', version: '1.0.0' }, + ], + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + t.ok(fs.lstatSync(path.join(dir, 'node_modules', 'a'), { throwIfNoEntry: false }), 'a self-link present') + t.ok(fs.lstatSync(path.join(dir, 'node_modules', 'b'), { throwIfNoEntry: false }), 'b self-link present') + + // Drop workspace a from both root deps and the workspaces glob + const pkgPath = path.join(dir, 'package.json') + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')) + pkg.workspaces = ['packages/b'] + pkg.dependencies = { b: '*' } + fs.writeFileSync(pkgPath, JSON.stringify(pkg)) + + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb2.reify({ installStrategy: 'linked' }) + + t.notOk(fs.lstatSync(path.join(dir, 'node_modules', 'a'), { throwIfNoEntry: false }), + 'orphan workspace self-link is removed') + t.ok(fs.lstatSync(path.join(dir, 'node_modules', 'b'), { throwIfNoEntry: false }), + 'still-declared workspace self-link is preserved') +}) + +tap.test('unmanaged symlinks (e.g. npm link) in node_modules are preserved across reify', async t => { + /* The orphan sweep should only touch links the linked strategy itself created (those resolving into the project's node_modules/.store/). + * A symlink pointing outside .store/ — e.g. one created by `npm link foo` without --save or by hand — must be left alone. + */ + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + ], + root: { + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '^4.0.0' }, + }, + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + // Drop a hand-made symlink to a target outside the project's .store/ + const externalDir = fs.mkdtempSync(`${getTempDir()}/external-`) + fs.writeFileSync(path.join(externalDir, 'package.json'), + JSON.stringify({ name: 'external-pkg', version: '1.0.0' })) + const externalLink = path.join(dir, 'node_modules', 'external-pkg') + fs.symlinkSync(externalDir, externalLink) + + // Remove abbrev so the sweep runs and would otherwise consider external-pkg orphaned + const pkgPath = path.join(dir, 'package.json') + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')) + delete pkg.dependencies + fs.writeFileSync(pkgPath, JSON.stringify(pkg)) + + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb2.reify({ installStrategy: 'linked' }) + + t.notOk(fs.existsSync(path.join(dir, 'node_modules', 'abbrev')), + 'orphan link into our .store/ is removed') + t.ok(fs.lstatSync(externalLink, { throwIfNoEntry: false }), + 'unmanaged symlink pointing outside .store/ is preserved') +}) + tap.test('orphaned scoped top-level link is cleaned up when only one of two scoped deps is removed', async t => { const graph = { registry: [ From 54c2ea8a64b7623aa5a400a19d936f87933e9ea7 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Tue, 5 May 2026 11:25:17 +0530 Subject: [PATCH 3/6] fix(arborist): skip orphan sweep on dry-run, lockfile-only, and filtered linked installs --- workspaces/arborist/lib/arborist/reify.js | 6 +- workspaces/arborist/test/isolated-mode.js | 86 +++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index f793933f2d1f6..9d0c398165dd5 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -124,7 +124,11 @@ module.exports = cls => class Reifier extends cls { await this[_diffTrees]() await this.#reifyPackages() if (linked) { - await this.#cleanOrphanedStoreEntries() + // The sweep mutates node_modules on disk, so skip it for dry runs and lockfile-only installs (those modes also short-circuit #reifyPackages). + // Skip when a filter is active too: filtered installs intentionally leave out-of-scope workspaces alone, and the ideal tree we'd sweep against does not represent their on-disk state. + if (!this.options.dryRun && !this.options.packageLockOnly && !this.diff?.filterSet?.size) { + await this.#cleanOrphanedStoreEntries() + } // swap back in the idealTree // so that the lockfile is preserved this.idealTree = oldTree diff --git a/workspaces/arborist/test/isolated-mode.js b/workspaces/arborist/test/isolated-mode.js index 89939c6e68c85..b2a4df2787b21 100644 --- a/workspaces/arborist/test/isolated-mode.js +++ b/workspaces/arborist/test/isolated-mode.js @@ -1873,6 +1873,92 @@ tap.test('orphaned link in declared-workspace node_modules is cleaned up when la 'abbrev symlink is removed even though the workspace itself is the only declared root dep') }) +tap.test('orphan sweep is skipped on dryRun and packageLockOnly linked installs', async t => { + // The sweep mutates node_modules; dry-run and package-lock-only installs must not touch the filesystem. + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + ], + root: { + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '^4.0.0' }, + }, + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + const linkPath = path.join(dir, 'node_modules', 'abbrev') + t.ok(fs.lstatSync(linkPath, { throwIfNoEntry: false }), 'abbrev link present after first install') + + // Drop the dep, then run dryRun and packageLockOnly — neither should remove the orphan + const pkgPath = path.join(dir, 'package.json') + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')) + delete pkg.dependencies + fs.writeFileSync(pkgPath, JSON.stringify(pkg)) + + const arbDry = new Arborist({ path: dir, registry, packumentCache: new Map(), cache, dryRun: true }) + await arbDry.reify({ installStrategy: 'linked', dryRun: true }) + t.ok(fs.lstatSync(linkPath, { throwIfNoEntry: false }), + 'dryRun does not remove orphan symlink') + + const arbLockOnly = new Arborist({ path: dir, registry, packumentCache: new Map(), cache, packageLockOnly: true }) + await arbLockOnly.reify({ installStrategy: 'linked', packageLockOnly: true }) + t.ok(fs.lstatSync(linkPath, { throwIfNoEntry: false }), + 'packageLockOnly does not remove orphan symlink') + + // A real install does perform the cleanup + const arbReal = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arbReal.reify({ installStrategy: 'linked' }) + t.notOk(fs.lstatSync(linkPath, { throwIfNoEntry: false }), + 'real install cleans up the orphan symlink') +}) + +tap.test('orphan sweep skips out-of-filter workspaces during workspace-filtered linked install', async t => { + // Filtered installs intentionally leave out-of-scope workspaces alone. + // The sweep would otherwise delete a stray-but-untouched symlink from a workspace that wasn't part of the filter. + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + ], + root: { + name: 'myroot', version: '1.0.0', + }, + workspaces: [ + { name: 'ws-a', version: '1.0.0', dependencies: { abbrev: '4.0.0' } }, + { name: 'ws-b', version: '1.0.0', dependencies: { abbrev: '4.0.0' } }, + ], + } + + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + // Plant a stale orphan link inside ws-b's node_modules to simulate an out-of-filter workspace having an entry the sweep would otherwise remove. + const stalePath = path.join(dir, 'packages', 'ws-b', 'node_modules', 'stale-pkg') + fs.symlinkSync('../../../node_modules/.store/nonexistent/node_modules/stale-pkg', stalePath) + + const arb2 = new Arborist({ + path: dir, + registry, + packumentCache: new Map(), + cache, + workspaces: ['ws-a'], + }) + await arb2.reify({ + installStrategy: 'linked', + workspaces: ['ws-a'], + }) + + t.ok(arb2.diff.filterSet.size > 0, 'filterSet is populated for ws-a-only install') + t.ok(fs.lstatSync(stalePath, { throwIfNoEntry: false }), + 'stale link in out-of-filter workspace is left untouched during filtered install') +}) + tap.test('orphaned workspace self-link in root node_modules is cleaned up when workspace is undeclared', async t => { // When root declares a workspace as a dependency, the workspace gets a self-link at root node_modules (e.g. node_modules/a -> ../packages/a). // If the workspace is later removed from root's dependencies, that self-link must be cleaned up. From e0028af132cf913c262fa30721f59c5b83bc4299 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Tue, 5 May 2026 11:27:16 +0530 Subject: [PATCH 4/6] Fix comments --- workspaces/arborist/lib/arborist/reify.js | 11 +++++------ workspaces/arborist/test/isolated-mode.js | 7 +++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 9d0c398165dd5..aadde9e3c8278 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -1364,12 +1364,11 @@ module.exports = cls => class Reifier extends cls { } } - /* Remove node_modules/ entries that aren't represented in the ideal tree. - * Run for the project root and each workspace's node_modules. - * The linked diff path can't see these because #buildLinkedActualForDiff derives the actual tree from the ideal, so removed deps are never compared. - * Only symlinks whose target resolves inside the project root are removed — that covers store links (node_modules/.store/...) and workspace self-links (e.g. node_modules/ -> ../packages/) that npm itself created. - * Symlinks pointing outside the project (e.g. `npm link foo` without --save targeting the global prefix, or hand-made `ln -s` to an external path) and real directories are preserved. - */ + // Remove node_modules/ entries that aren't represented in the ideal tree. + // Run for the project root and each workspace's node_modules. + // The linked diff path can't see these because #buildLinkedActualForDiff derives the actual tree from the ideal, so removed deps are never compared. + // Only symlinks whose target resolves inside the project root are removed — that covers store links (node_modules/.store/...) and workspace self-links (e.g. node_modules/ -> ../packages/) that npm itself created. + // Symlinks pointing outside the project (e.g. `npm link foo` without --save targeting the global prefix, or hand-made `ln -s` to an external path) and real directories are preserved. async #cleanOrphanedTopLevelLinks (nmDir, validTopLevel) { const projectPrefix = resolve(this.path) + sep let dirents diff --git a/workspaces/arborist/test/isolated-mode.js b/workspaces/arborist/test/isolated-mode.js index b2a4df2787b21..3484f5e9ee7ae 100644 --- a/workspaces/arborist/test/isolated-mode.js +++ b/workspaces/arborist/test/isolated-mode.js @@ -1795,7 +1795,7 @@ tap.test('orphaned store entries are cleaned up on dependency removal', async t t.equal(entriesAfterRemoval.length, 0, 'all store entries are removed when dependencies are removed') - /* https://github.com/npm/cli/issues/9308 — the top-level node_modules symlink for the removed dep was left behind, dangling into the just-cleaned store. */ + // https://github.com/npm/cli/issues/9308 — the top-level node_modules symlink for the removed dep was left behind, dangling into the just-cleaned store. t.notOk(fs.existsSync(path.join(dir, 'node_modules', 'which')), 'top-level symlink for removed dependency is also cleaned up') }) @@ -2001,9 +2001,8 @@ tap.test('orphaned workspace self-link in root node_modules is cleaned up when w }) tap.test('unmanaged symlinks (e.g. npm link) in node_modules are preserved across reify', async t => { - /* The orphan sweep should only touch links the linked strategy itself created (those resolving into the project's node_modules/.store/). - * A symlink pointing outside .store/ — e.g. one created by `npm link foo` without --save or by hand — must be left alone. - */ + // The orphan sweep should only touch links the linked strategy itself created (those resolving into the project's node_modules/.store/). + // A symlink pointing outside .store/ — e.g. one created by `npm link foo` without --save or by hand — must be left alone. const graph = { registry: [ { name: 'abbrev', version: '4.0.0' }, From b21776240fe6cfaf212b94e404825f35ae4af854 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Tue, 5 May 2026 11:31:14 +0530 Subject: [PATCH 5/6] fix(arborist): scope orphan sweep to filtered workspaces and normalize location separators --- workspaces/arborist/lib/arborist/reify.js | 64 ++++++++++++++++------- workspaces/arborist/test/isolated-mode.js | 45 ++++++++++++---- 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index aadde9e3c8278..36a0d02d2067b 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -125,8 +125,8 @@ module.exports = cls => class Reifier extends cls { await this.#reifyPackages() if (linked) { // The sweep mutates node_modules on disk, so skip it for dry runs and lockfile-only installs (those modes also short-circuit #reifyPackages). - // Skip when a filter is active too: filtered installs intentionally leave out-of-scope workspaces alone, and the ideal tree we'd sweep against does not represent their on-disk state. - if (!this.options.dryRun && !this.options.packageLockOnly && !this.diff?.filterSet?.size) { + // The sweep itself scopes to in-filter workspaces when a filter is active, so it's safe to run for filtered installs too. + if (!this.options.dryRun && !this.options.packageLockOnly) { await this.#cleanOrphanedStoreEntries() } // swap back in the idealTree @@ -1300,31 +1300,34 @@ module.exports = cls => class Reifier extends cls { // Collect valid store keys and valid top-level links per node_modules directory. // Store entries have location node_modules/.store/{key}/node_modules/{pkg}. // Top-level links have location {prefix}/node_modules/{pkg} or {prefix}/node_modules/@scope/{pkg}, where {prefix} is empty for the root project and the workspace's localLocation for workspace deps. + // Locations are normalized to forward slashes here because IsolatedNode/IsolatedLink locations are built with path.join, which uses backslashes on Windows. const validKeys = new Set() const nmDirs = new Map() - const storeMarker = `${sep}.store${sep}` + const NM_PREFIX = 'node_modules/' + const STORE_MARKER = '/.store/' for (const child of this.idealTree.children.values()) { + const loc = child.location.replace(/\\/g, '/') if (child.isInStore) { - const key = child.location.split(sep)[2] + const key = loc.split('/')[2] validKeys.add(key) continue } if (!child.isLink) { continue } - const nmIdx = child.location.lastIndexOf(`node_modules${sep}`) - if (nmIdx === -1 || child.location.includes(storeMarker)) { + const nmIdx = loc.lastIndexOf(NM_PREFIX) + if (nmIdx === -1 || loc.includes(STORE_MARKER)) { continue } - const prefix = child.location.slice(0, nmIdx) + const prefix = loc.slice(0, nmIdx) const dir = resolve(this.path, prefix, 'node_modules') - const rest = child.location.slice(nmIdx + `node_modules${sep}`.length) + const rest = loc.slice(nmIdx + NM_PREFIX.length) let entry if (rest.startsWith('@')) { - const [scope, name] = rest.split(sep) + const [scope, name] = rest.split('/') entry = `${scope}${sep}${name}` } else { - entry = rest.split(sep)[0] + entry = rest.split('/')[0] } let set = nmDirs.get(dir) if (!set) { @@ -1333,15 +1336,38 @@ module.exports = cls => class Reifier extends cls { } set.add(entry) } - // Always sweep the root node_modules and every workspace's node_modules even if no top-level links remain (e.g. last dep was just uninstalled). - // Without this, a workspace whose only remaining symlinks were removed would never be revisited and would leak orphan links. - if (!nmDirs.has(nmDir)) { - nmDirs.set(nmDir, new Set()) - } - for (const ws of this.idealTree.fsChildren) { - const wsNmDir = resolve(ws.path, 'node_modules') - if (!nmDirs.has(wsNmDir)) { - nmDirs.set(wsNmDir, new Set()) + + // Determine which node_modules directories to sweep. + // For an unfiltered install, sweep the project root and every workspace's node_modules even if no top-level links remain (e.g. last dep was just uninstalled). + // For a filtered install (npm install -w ), restrict the sweep to the in-scope workspaces so out-of-scope workspaces and the project root are left untouched, mirroring what the diff would do. + const filteredNames = this.options.workspaces + const isFiltered = Array.isArray(filteredNames) && filteredNames.length > 0 + if (isFiltered) { + const allowedDirs = new Set() + for (const ws of this.idealTree.fsChildren) { + if (filteredNames.includes(ws.packageName) || filteredNames.includes(ws.name)) { + allowedDirs.add(resolve(ws.path, 'node_modules')) + } + } + for (const dir of [...nmDirs.keys()]) { + if (!allowedDirs.has(dir)) { + nmDirs.delete(dir) + } + } + for (const dir of allowedDirs) { + if (!nmDirs.has(dir)) { + nmDirs.set(dir, new Set()) + } + } + } else { + if (!nmDirs.has(nmDir)) { + nmDirs.set(nmDir, new Set()) + } + for (const ws of this.idealTree.fsChildren) { + const wsNmDir = resolve(ws.path, 'node_modules') + if (!nmDirs.has(wsNmDir)) { + nmDirs.set(wsNmDir, new Set()) + } } } diff --git a/workspaces/arborist/test/isolated-mode.js b/workspaces/arborist/test/isolated-mode.js index 3484f5e9ee7ae..56da4331f090e 100644 --- a/workspaces/arborist/test/isolated-mode.js +++ b/workspaces/arborist/test/isolated-mode.js @@ -1916,19 +1916,26 @@ tap.test('orphan sweep is skipped on dryRun and packageLockOnly linked installs' 'real install cleans up the orphan symlink') }) -tap.test('orphan sweep skips out-of-filter workspaces during workspace-filtered linked install', async t => { - // Filtered installs intentionally leave out-of-scope workspaces alone. - // The sweep would otherwise delete a stray-but-untouched symlink from a workspace that wasn't part of the filter. +tap.test('orphan sweep is scoped to in-filter workspaces during workspace-filtered linked install', async t => { + // Filtered installs should clean up dependencies removed from the targeted workspace, but leave out-of-scope workspaces alone. + // ws-a is the in-filter workspace: it keeps one dep (which) and drops one (abbrev) so both the surviving-link and orphan-sweep paths are exercised inside an in-scope workspace. + // ws-c is also in the filter but starts with one dep (abbrev) and drops it entirely, exercising the case where the in-filter workspace's node_modules dir is not populated by any surviving links. + // ws-b is out of filter and gets a stale link planted to verify it is preserved. const graph = { registry: [ { name: 'abbrev', version: '4.0.0' }, + { name: 'which', version: '4.0.0' }, ], root: { - name: 'myroot', version: '1.0.0', + name: 'myroot', + version: '1.0.0', + // ws-c is declared as a root dep so its self-link lives at root node_modules — that means ws-c's own node_modules has no self-link, and dropping its only dep leaves the dir empty. + dependencies: { 'ws-c': '*' }, }, workspaces: [ - { name: 'ws-a', version: '1.0.0', dependencies: { abbrev: '4.0.0' } }, + { name: 'ws-a', version: '1.0.0', dependencies: { abbrev: '4.0.0', which: '4.0.0' } }, { name: 'ws-b', version: '1.0.0', dependencies: { abbrev: '4.0.0' } }, + { name: 'ws-c', version: '1.0.0', dependencies: { abbrev: '4.0.0' } }, ], } @@ -1938,25 +1945,43 @@ tap.test('orphan sweep skips out-of-filter workspaces during workspace-filtered const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) await arb1.reify({ installStrategy: 'linked' }) - // Plant a stale orphan link inside ws-b's node_modules to simulate an out-of-filter workspace having an entry the sweep would otherwise remove. + // Plant a stale orphan link inside ws-b (out-of-filter): the sweep must NOT touch it. const stalePath = path.join(dir, 'packages', 'ws-b', 'node_modules', 'stale-pkg') fs.symlinkSync('../../../node_modules/.store/nonexistent/node_modules/stale-pkg', stalePath) + // Drop abbrev from ws-a's package.json so that ws-a/node_modules/abbrev becomes orphan; which stays as a surviving link. + const wsAPkgPath = path.join(dir, 'packages', 'ws-a', 'package.json') + const wsAPkg = JSON.parse(fs.readFileSync(wsAPkgPath, 'utf8')) + wsAPkg.dependencies = { which: '4.0.0' } + fs.writeFileSync(wsAPkgPath, JSON.stringify(wsAPkg)) + + // Drop all deps from ws-c so its node_modules has no surviving links — exercises the in-filter empty-dir seeding path. + const wsCPkgPath = path.join(dir, 'packages', 'ws-c', 'package.json') + const wsCPkg = JSON.parse(fs.readFileSync(wsCPkgPath, 'utf8')) + delete wsCPkg.dependencies + fs.writeFileSync(wsCPkgPath, JSON.stringify(wsCPkg)) + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache, - workspaces: ['ws-a'], + workspaces: ['ws-a', 'ws-c'], }) await arb2.reify({ installStrategy: 'linked', - workspaces: ['ws-a'], + workspaces: ['ws-a', 'ws-c'], }) - t.ok(arb2.diff.filterSet.size > 0, 'filterSet is populated for ws-a-only install') + t.ok(arb2.diff.filterSet.size > 0, 'filterSet is populated for filtered install') + t.notOk(fs.lstatSync(path.join(dir, 'packages', 'ws-a', 'node_modules', 'abbrev'), { throwIfNoEntry: false }), + 'orphan link in in-filter workspace with surviving deps is removed') + t.ok(fs.lstatSync(path.join(dir, 'packages', 'ws-a', 'node_modules', 'which'), { throwIfNoEntry: false }), + 'surviving link in in-filter workspace is preserved') + t.notOk(fs.lstatSync(path.join(dir, 'packages', 'ws-c', 'node_modules', 'abbrev'), { throwIfNoEntry: false }), + 'orphan link in in-filter workspace with no surviving deps is removed') t.ok(fs.lstatSync(stalePath, { throwIfNoEntry: false }), - 'stale link in out-of-filter workspace is left untouched during filtered install') + 'stale link in out-of-filter workspace is preserved') }) tap.test('orphaned workspace self-link in root node_modules is cleaned up when workspace is undeclared', async t => { From 81e4d5be602dc41c8e7782f0fdae28b7c7401fd8 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Wed, 6 May 2026 09:31:21 +0530 Subject: [PATCH 6/6] fix(arborist): sweep root node_modules with --include-workspace-root and document intra-project symlink trade-off --- workspaces/arborist/lib/arborist/reify.js | 6 +- workspaces/arborist/test/isolated-mode.js | 85 +++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 36a0d02d2067b..2bd64212df043 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -1339,7 +1339,8 @@ module.exports = cls => class Reifier extends cls { // Determine which node_modules directories to sweep. // For an unfiltered install, sweep the project root and every workspace's node_modules even if no top-level links remain (e.g. last dep was just uninstalled). - // For a filtered install (npm install -w ), restrict the sweep to the in-scope workspaces so out-of-scope workspaces and the project root are left untouched, mirroring what the diff would do. + // For a filtered install (npm install -w ), restrict the sweep to the in-scope workspaces so out-of-scope workspaces are left untouched, mirroring what the diff would do. + // When --include-workspace-root is set, the filter scope pulls in root deps too, so the root node_modules is included in the sweep. const filteredNames = this.options.workspaces const isFiltered = Array.isArray(filteredNames) && filteredNames.length > 0 if (isFiltered) { @@ -1349,6 +1350,9 @@ module.exports = cls => class Reifier extends cls { allowedDirs.add(resolve(ws.path, 'node_modules')) } } + if (this.options.includeWorkspaceRoot) { + allowedDirs.add(nmDir) + } for (const dir of [...nmDirs.keys()]) { if (!allowedDirs.has(dir)) { nmDirs.delete(dir) diff --git a/workspaces/arborist/test/isolated-mode.js b/workspaces/arborist/test/isolated-mode.js index 56da4331f090e..86bb2f0ea23ed 100644 --- a/workspaces/arborist/test/isolated-mode.js +++ b/workspaces/arborist/test/isolated-mode.js @@ -1984,6 +1984,91 @@ tap.test('orphan sweep is scoped to in-filter workspaces during workspace-filter 'stale link in out-of-filter workspace is preserved') }) +tap.test('orphan sweep includes root node_modules when --include-workspace-root is set', async t => { + // With --include-workspace-root, the filter scope pulls root deps in too, so dropped root deps must be cleaned up alongside the in-filter workspaces. + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + { name: 'which', version: '4.0.0' }, + ], + root: { + name: 'myroot', + version: '1.0.0', + dependencies: { which: '4.0.0' }, + }, + workspaces: [ + { name: 'ws-a', version: '1.0.0', dependencies: { abbrev: '4.0.0' } }, + ], + } + + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + t.ok(fs.lstatSync(path.join(dir, 'node_modules', 'which'), { throwIfNoEntry: false }), 'which is installed at root') + + // Drop the root dep so node_modules/which becomes orphan. + const pkgPath = path.join(dir, 'package.json') + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')) + delete pkg.dependencies + fs.writeFileSync(pkgPath, JSON.stringify(pkg)) + + const arb2 = new Arborist({ + path: dir, + registry, + packumentCache: new Map(), + cache, + workspaces: ['ws-a'], + includeWorkspaceRoot: true, + }) + await arb2.reify({ + installStrategy: 'linked', + workspaces: ['ws-a'], + includeWorkspaceRoot: true, + }) + + t.notOk(fs.lstatSync(path.join(dir, 'node_modules', 'which'), { throwIfNoEntry: false }), + 'orphan root dep is removed when --include-workspace-root scope covers it') +}) + +tap.test('hand-made symlink inside the project root is intentionally swept by linked install', async t => { + // Documents an explicit trade-off: a hand-made symlink whose target lives inside the project (e.g. node_modules/local-tool -> ../tools/local-tool) is indistinguishable from a workspace self-link or store link by target alone. + // The linked sweep treats it as orphaned and removes it on the next reify, matching how the default install strategy already behaves with intra-project symlinks. + // External targets (outside the project root) remain preserved — see the sibling test 'unmanaged symlinks (e.g. npm link) in node_modules are preserved across reify'. + const graph = { + registry: [ + { name: 'abbrev', version: '4.0.0' }, + ], + root: { + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '^4.0.0' }, + }, + } + const { dir, registry } = await getRepo(graph) + const cache = fs.mkdtempSync(`${getTempDir()}/test-`) + + const arb1 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb1.reify({ installStrategy: 'linked' }) + + // Create a target folder inside the project root and link to it from node_modules. + const localToolDir = path.join(dir, 'tools', 'local-tool') + fs.mkdirSync(localToolDir, { recursive: true }) + fs.writeFileSync(path.join(localToolDir, 'package.json'), + JSON.stringify({ name: 'local-tool', version: '0.0.0' })) + const intraLink = path.join(dir, 'node_modules', 'local-tool') + fs.symlinkSync(path.join('..', 'tools', 'local-tool'), intraLink) + + const arb2 = new Arborist({ path: dir, registry, packumentCache: new Map(), cache }) + await arb2.reify({ installStrategy: 'linked' }) + + t.notOk(fs.lstatSync(intraLink, { throwIfNoEntry: false }), + 'intra-project hand-made symlink is removed by the sweep (intentional trade-off)') + t.ok(fs.existsSync(localToolDir), + 'the target directory itself is left intact — only the symlink is removed') +}) + tap.test('orphaned workspace self-link in root node_modules is cleaned up when workspace is undeclared', async t => { // When root declares a workspace as a dependency, the workspace gets a self-link at root node_modules (e.g. node_modules/a -> ../packages/a). // If the workspace is later removed from root's dependencies, that self-link must be cleaned up.