From 841f3a81afbe35d2873a5ec820394121d98887f8 Mon Sep 17 00:00:00 2001 From: Sascha Greuel Date: Tue, 10 Mar 2026 15:00:35 +0100 Subject: [PATCH] Added new spiders - Meta-WebIndexer - Meta-ExternalAds - Meta-ExternalAgent - Meta-ExternalFetcher - OAI-SearchBot - ChatGPT-User - Perplexity-User - PlagAwareBot - DuckAssistBot - CCBot - DataForSeoBot - Gemini-Deep-Research - quillbot PerplexityBot doesn't appear in https://assets.woltlab.com/spiderlist/typhoon/list.xml (used by RefreshSearchRobotsCronjob prior 6.1), and it's missing in SpiderCollecting since 6.1. --- spiderList/spiderList.xml | 55 +++++++++++++++++++ .../event/spider/SpiderCollecting.class.php | 51 +++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/spiderList/spiderList.xml b/spiderList/spiderList.xml index b015c90d96d..7b08e5f581a 100644 --- a/spiderList/spiderList.xml +++ b/spiderList/spiderList.xml @@ -1719,8 +1719,63 @@ GPTBot https://openai.com/gptbot + + OAI-SearchBot + https://openai.com/searchbot + + + ChatGPT-User + https://openai.com/bot + PerplexityBot https://perplexity.ai/perplexitybot + + Perplexity-User + https://perplexity.ai/perplexity-user + + + Meta-WebIndexer + https://developers.facebook.com/docs/sharing/webmasters/web-crawlers + + + Meta-ExternalAds + https://developers.facebook.com/docs/sharing/webmasters/web-crawlers + + + Meta-ExternalAgent + https://developers.facebook.com/docs/sharing/webmasters/web-crawlers + + + Meta-ExternalFetcher + https://developers.facebook.com/docs/sharing/webmasters/web-crawlers + + + Bytespider + + + PlagAware + https://www.plagaware.com/bot + + + DuckAssistBot + https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot + + + Common Crawl Bot + https://commoncrawl.org/faq + + + DataForSEO Link Bot + https://dataforseo.com/dataforseo-bot + + + Gemini Deep Research + https://gemini.google/overview/deep-research/ + + + Quillbot + https://quillbot.com/ + diff --git a/wcfsetup/install/files/lib/event/spider/SpiderCollecting.class.php b/wcfsetup/install/files/lib/event/spider/SpiderCollecting.class.php index 17d1562c5cb..2965db611af 100644 --- a/wcfsetup/install/files/lib/event/spider/SpiderCollecting.class.php +++ b/wcfsetup/install/files/lib/event/spider/SpiderCollecting.class.php @@ -641,6 +641,57 @@ public function __construct() ); $this->register(new Spider('360Spider', '360Spider')); $this->register(new Spider('GPTBot', 'GPTBot', 'https://openai.com/gptbot')); + $this->register(new Spider('OAI-SearchBot', 'OAI-SearchBot', 'https://openai.com/searchbot')); + $this->register(new Spider('ChatGPT-User', 'ChatGPT-User', 'https://openai.com/bot')); + $this->register(new Spider('PerplexityBot', 'PerplexityBot', 'https://perplexity.ai/perplexitybot')); + $this->register(new Spider('Perplexity-User', 'Perplexity-User', 'https://perplexity.ai/perplexity-user')); + $this->register( + new Spider( + 'meta-webindexer', + 'Meta-WebIndexer', + 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' + ) + ); + $this->register( + new Spider( + 'meta-externalads', + 'Meta-ExternalAds', + 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' + ) + ); + $this->register( + new Spider( + 'meta-externalagent', + 'Meta-ExternalAgent', + 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' + ) + ); + $this->register( + new Spider( + 'meta-externalfetcher', + 'Meta-ExternalFetcher', + 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' + ) + ); + $this->register(new Spider('Bytespider', 'Bytespider')); + $this->register(new Spider('PlagAwareBot', 'PlagAware', 'https://www.plagaware.com/bot')); + $this->register( + new Spider( + 'DuckAssistBot', + 'DuckAssistBot', + 'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot' + ) + ); + $this->register(new Spider('CCBot', 'Common Crawl Bot', 'https://commoncrawl.org/faq')); + $this->register(new Spider('DataForSeoBot', 'DataForSEO Link Bot', 'https://dataforseo.com/dataforseo-bot')); + $this->register( + new Spider( + 'Gemini-Deep-Research', + 'Gemini Deep Research', + 'https://gemini.google/overview/deep-research/' + ) + ); + $this->register(new Spider('quillbot', 'Quillbot', 'https://quillbot.com/')); } /**