Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .changeset/fix-crawler-probe-all-registered-agents.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
---

Fix registry crawler skipping non-sales registered agents for health/capability snapshots. The periodic crawl now re-fetches all registered agents on every tick instead of capturing only sales agents at startup, so signals/buying/creative agents get probed without a server restart.
16 changes: 8 additions & 8 deletions server/src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ export class CrawlerService {
}
}

startPeriodicCrawl(agents: Agent[], intervalMinutes: number = 60) {
// Initial crawl
this.crawlAllAgents(agents);

// Periodic crawl
this.intervalId = setInterval(() => {
this.crawlAllAgents(agents);
}, intervalMinutes * 60 * 1000);
startPeriodicCrawl(getAgents: () => Promise<Agent[]>, intervalMinutes: number = 60) {
const run = () =>
getAgents()
.then(agents => this.crawlAllAgents(agents))
.catch(err => log.error({ err }, 'Periodic crawl failed'));

run();
this.intervalId = setInterval(run, intervalMinutes * 60 * 1000);

log.info({ intervalMinutes }, 'Periodic crawl started');
}
Expand Down
36 changes: 20 additions & 16 deletions server/src/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2078,12 +2078,15 @@ export class HTTPServer {
// outbound traffic to every registered agent. Per-agent refresh is
// available to owners at POST /api/registry/agents/:encodedUrl/refresh.
this.app.post("/api/crawler/run", requireAuth, requireAdmin, async (req, res) => {
// Crawler iterates sales agents — they're the ones with publisher
// authorizations and list_authorized_properties responses to walk.
// Pre-#3540 this filter was inverted (matched 'buying' against the
// accidentally-aligned classification); see #3774 for the sweep
// that closed the remaining gaps.
const agents = await this.agentService.listAgents("sales");
// Full-registry crawl: all registered agents. Sales agents drive the
// publisher adagents.json walk; all agent types get health + capability
// snapshots via refreshAgentSnapshots. Mirrors the periodic-crawl scope
// added in #4213 so a manual admin run and the scheduled run behave
// identically. `viewerHasApiAccess` defaults to false — members_only
// agents are excluded from both paths intentionally (periodic crawl
// probes the public-facing registry surface; refreshSingleAgent covers
// owner-triggered probes for members_only agents).
const agents = await this.agentService.listAgents();
const result = await this.crawler.crawlAllAgents(agents);
res.json(result);
});
Expand Down Expand Up @@ -9031,16 +9034,17 @@ ${p.category ? `<category>${p.category}</category>\n` : ''}<url>${publishedUrl}<
logger.info({ isWorker }, 'Process role resolved');

if (isWorker) {
// Start periodic property crawler for sales agents — they're the
// ones with publisher authorizations and list_authorized_properties
// responses to walk. Pre-#3540 this filtered on 'buying' (inverted-
// but-aligned with the classification bug); see #3774 for the
// sweep that closed remaining gaps.
const salesAgents = await this.agentService.listAgents("sales");
if (salesAgents.length > 0) {
logger.debug({ salesAgentCount: salesAgents.length }, 'Starting property crawler');
this.crawler.startPeriodicCrawl(salesAgents, 360); // Crawl every 6 hours
}
// Start periodic registry crawler for all registered agents. Re-fetches
// the agent list on every tick so newly registered agents are picked up
// without a restart. Sales agents drive publisher adagents.json
// discovery; signals/buying/creative agents still need health +
// capability snapshots on the same cycle. `viewerHasApiAccess` defaults
// to false — members_only agents are intentionally excluded from the
// periodic crawl (the public-facing registry surface is the target);
// owner-triggered probes for members_only agents go through
// POST /api/registry/agents/:encodedUrl/refresh. Fixes #4213.
logger.debug('Starting registry crawler');
this.crawler.startPeriodicCrawl(() => this.agentService.listAgents(), 360); // Crawl every 6 hours

// Crawl catalog domains for adagents.json (demand-driven queue)
this.crawler.startPeriodicCatalogCrawl(30); // Process queue every 30 minutes
Expand Down
Loading