diff --git a/src/Sanitize.php b/src/Sanitize.php index df1176b7..72310bf8 100644 --- a/src/Sanitize.php +++ b/src/Sanitize.php @@ -58,6 +58,8 @@ class Sanitize implements RegistryAware public $allow_data_attr = true; /** @var bool */ public $allow_aria_attr = true; + /** @var string[] */ + public $disallowed_uri_protocols = ['javascript']; /** @var array> */ public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']]; /** @var bool */ @@ -280,6 +282,14 @@ public function allow_aria_attr(bool $allow = true): void $this->allow_aria_attr = $allow; } + /** + * @param string[] $protocols List of protocols to disallow + */ + public function disallow_uri_protocols(array $protocols = ['javascript']): void + { + $this->disallowed_uri_protocols = $protocols; + } + /** * @return void */ @@ -541,6 +551,12 @@ public function sanitize(string $data, int $type, string $base = '') $this->replace_urls($document, $element, $attributes); } + if ($this->disallowed_uri_protocols) { + foreach ($this->disallowed_uri_protocols as $protocol) { + $this->strip_uri_protocol($xpath, $protocol); + } + } + // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. if ($this->image_handler !== '' && $this->enable_cache) { $images = $document->getElementsByTagName('img'); @@ -742,6 +758,51 @@ protected function enforce_allowed_html_nodes(\DOMNode $element, bool $allow_dat } } + private function extract_protocol(string $uri): string + { + if (!str_contains($uri, ':')) { + return ''; + } + $extracted_protocol = strtolower(preg_replace( + '/[\x01-\x20\s]/', + '', + rawurldecode(explode(':', $uri)[0]) + ) ?? ''); + return $extracted_protocol; + } + + /** + * Remove a disallowed URI protocol + */ + protected function strip_uri_protocol(\DOMXPath $xpath, string $protocol): void + { + $protocol = strtolower($protocol); + $elements = $xpath->query('.//a[@href]|.//iframe[@src]|.//math//*[@href]'); + + if ($elements === false) { + throw new \SimplePie\Exception(sprintf( + '%s(): Possibly malformed expression', + __METHOD__ + ), 1); + } + + foreach ($elements as $element) { + if (!($element instanceof \DOMElement)) { + continue; + } + + $href = $element->getAttribute('href'); + $src = $element->getAttribute('src'); + + if ($element->hasAttribute('href') && $this->extract_protocol($href) === $protocol) { + $element->setAttribute('href', 'unsafe:' . $href); + } + if ($element->hasAttribute('src') && $this->extract_protocol($src) === $protocol) { + $element->setAttribute('src', 'unsafe:' . $src); + } + } + } + /** * @param int-mask-of $type * @return void diff --git a/src/SimplePie.php b/src/SimplePie.php index 38c61e84..3d31bb12 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -692,6 +692,13 @@ class SimplePie */ public $allow_aria_attr = true; + /** + * @var string[] Stores array of disallowed URI protocols + * @see SimplePie::disallow_uri_protocols() + * @access private + */ + public $disallowed_uri_protocols = ['javascript']; + /** * @var bool Should we throw exceptions, or use the old-style error property? * @access private @@ -1589,6 +1596,14 @@ public function allow_aria_attr(bool $allow = true): void $this->sanitize->allow_aria_attr($allow); } + /** + * @param string[] $protocols List of protocols to disallow + */ + public function disallow_uri_protocols(array $protocols = ['javascript']): void + { + $this->sanitize->disallow_uri_protocols($protocols); + } + /** * @return void */