Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions src/Sanitize.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class Sanitize implements RegistryAware
public $allow_data_attr = true;
/** @var bool */
public $allow_aria_attr = true;
/** @var string[] */
public $disallowed_uri_schemes = ['javascript'];
/** @var array<string, array<string, string>> */
public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
/** @var bool */
Expand Down Expand Up @@ -280,6 +282,14 @@ public function allow_aria_attr(bool $allow = true): void
$this->allow_aria_attr = $allow;
}

/**
* @param string[] $schemes List of URI schemes (protocols) to disallow
*/
public function disallow_uri_schemes(array $schemes = ['javascript']): void
{
$this->disallowed_uri_schemes = $schemes;
}

/**
* @return void
*/
Expand Down Expand Up @@ -352,8 +362,8 @@ public function set_output_encoding(string $encoding = 'UTF-8')
* containing URLs that need to be resolved relative to the feed
*
* Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite,
* |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src,
* |ins|@cite, |q|@cite, |source|@src, |video|@src
* |del|@cite, |form|@action, |iframe|@src, |img|@longdesc, |img|@src,
* |input|@src, |ins|@cite, |q|@cite, |source|@src, |video|@src
*
* @since 1.0
* @param array<string, string|string[]>|null $element_attribute Element/attribute key/value pairs, null for default
Expand All @@ -369,6 +379,7 @@ public function set_url_replacements(?array $element_attribute = null)
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'iframe' => 'src',
'img' => [
'longdesc',
'src'
Expand Down Expand Up @@ -535,12 +546,20 @@ public function sanitize(string $data, int $type, string $base = '')
}
}

// Replace relative URLs
// Replace relative URLs and blocks disallowed URI schemes (protocols)
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attributes) {
$this->replace_urls($document, $element, $attributes);
}

// MathML and SVG allow href on arbitrary descendants,
// so require a walk of the DOM
if ($this->disallowed_uri_schemes !== []
&& ($document->getElementsByTagName('math')->length > 0
|| $document->getElementsByTagName('svg')->length > 0)) {
$this->block_disallowed_schemes_in_descendants($xpath);
}

// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
if ($this->image_handler !== '' && $this->enable_cache) {
$images = $document->getElementsByTagName('img');
Expand Down Expand Up @@ -657,7 +676,10 @@ public function replace_urls(DOMDocument $document, string $tag, $attributes)
if ($element->hasAttribute($attribute)) {
$value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]);
if ($value !== false) {
$value = $this->https_url($value);
// Block disallowed URI protocols (e.g. javascript:), otherwise force HTTPS where applicable
$value = ($this->disallowed_uri_schemes !== [] && !$this->is_allowed_scheme($value))
? 'unsafe:' . $value
: $this->https_url($value);
$element->setAttribute($attribute, $value);
}
}
Expand Down Expand Up @@ -742,6 +764,51 @@ protected function enforce_allowed_html_nodes(\DOMNode $element, bool $allow_dat
}
}

private function is_allowed_scheme(string $uri): bool
{
$pos = strpos($uri, ':');
if ($pos === false) {
return true;
}
$scheme = strtolower(substr($uri, 0, $pos));
if (!ctype_alnum($scheme)) {
return false;
}
return !in_array($scheme, $this->disallowed_uri_schemes, true);
}

/**
* Block disallowed URI schemes (protocols) on elements carrying an href attribute.
*
* This handles MathML and SVG elements which permit href on arbitrary descendant elements
* For SVG, also checks xlink:href attribute.
*/
private function block_disallowed_schemes_in_descendants(\DOMXPath $xpath): void
{
// Note: content is parsed via loadHTML(), which does not namespace-process attributes
$elements = $xpath->query('.//*[self::math or self::svg]/descendant-or-self::*[@href or @*[name()="xlink:href"]]');
if ($elements === false) {
return;
}
foreach ($elements as $element) {
if (!($element instanceof \DOMElement)) {
continue;
}
if ($element->hasAttribute('href')) {
$href = $element->getAttribute('href');
if (!$this->is_allowed_scheme($href)) {
$element->setAttribute('href', 'unsafe:' . $href);
}
}
if ($element->hasAttribute('xlink:href')) {
$href = $element->getAttribute('xlink:href');
if (!$this->is_allowed_scheme($href)) {
$element->setAttribute('xlink:href', 'unsafe:' . $href);
}
}
}
}

/**
* @param int-mask-of<SimplePie::CONSTRUCT_*> $type
* @return void
Expand Down
15 changes: 15 additions & 0 deletions src/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,13 @@ class SimplePie
*/
public $allow_aria_attr = true;

/**
* @var string[] Stores array of disallowed URI schemes (protocols)
* @see SimplePie::disallow_uri_schemes()
* @access private
*/
public $disallowed_uri_schemes = ['javascript'];

/**
* @var bool Should we throw exceptions, or use the old-style error property?
* @access private
Expand Down Expand Up @@ -1589,6 +1596,14 @@ public function allow_aria_attr(bool $allow = true): void
$this->sanitize->allow_aria_attr($allow);
}

/**
* @param string[] $schemes List of schemes (protocols) to disallow
*/
public function disallow_uri_schemes(array $schemes = ['javascript']): void
{
$this->sanitize->disallow_uri_schemes($schemes);
}

/**
* @return void
*/
Expand Down
186 changes: 186 additions & 0 deletions tests/Unit/SanitizeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,190 @@ public function testSanitizeURLResolution(string $given, string $expected): void

self::assertSame($expected, $sanitize->sanitize($given, SIMPLEPIE_CONSTRUCT_HTML, $base));
}

/**
* @param string[] $disallowedSchemes List of schemes (protocols) to disallow
* @dataProvider disallowedUriSchemesProvider
*/
public function testDisallowedUriSchemes(
string $input,
string $expected,
array $disallowedSchemes = ['javascript']
): void {
$sanitize = new Sanitize();
$sanitize->disallow_uri_schemes($disallowedSchemes);
$sanitize->strip_htmltags = [];

$sanitize->set_registry(new Registry());
$base = 'http://example.com/';
self::assertSame($expected, $sanitize->sanitize($input, \SimplePie\SimplePie::CONSTRUCT_HTML, $base));
}

/**
* @return iterable<array{string,string,array<string>}>
*/
public static function disallowedUriSchemesProvider(): iterable
{
yield 'javascript scheme in href' => [
'<a href="javascript:alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme with spaces in href' => [
'<a href=" javascript:alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
];
yield 'javascript scheme in iframe src' => [
'<iframe src="javascript:alert(\'XSS\')"></iframe>',
'<iframe src="unsafe:javascript:alert(\'XSS\')" sandbox="allow-scripts allow-same-origin"></iframe>',
['javascript'],
];

yield 'javascript scheme case insensitive' => [
'<a href="JaVaScRiPt:alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
true,
];

yield 'javascript scheme url encoded' => [
'<a href="%6A%61%76%61%73%63%72%69%70%74:alert(\'XSS\')">Click me</a>',
'<a href="http://example.com/">Click me</a>',
['javascript'],
];

yield 'javascript scheme with scheme colon url encoded' => [
'<a href="javascript%3Aalert(\'XSS\')">Click me</a>',
'<a href="http://example.com/javascript%3Aalert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme encoded with numeric HTML entities' => [
'<a href="&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;:alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme encoded with hex entities' => [
'<a href="&#x6a;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;:alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme with scheme colon as numeric HTML entity' => [
'<a href="javascript&#58;alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme with scheme colon as hex HTML entity' => [
'<a href="javascript&#x3a;alert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript:alert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme with scheme colon as named HTML entity' => [
'<a href="javascript&colon;alert(\'XSS\')">Click me</a>',
'<a href="http://example.com/javascript&amp;colon;alert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme double encoded with URL encoding inside numeric HTML entities' => [
'<a href="&#37;&#54;&#65;&#37;&#54;&#49;&#37;&#55;&#54;&#37;&#54;&#49;&#37;&#55;&#51;&#37;&#54;&#51;&#37;&#55;&#50;&#37;&#54;&#57;&#37;&#55;&#48;&#37;&#55;&#52;:alert(\'XSS\')">Click me</a>',
'<a href="http://example.com/">Click me</a>',
['javascript'],
];

yield 'javascript scheme with scheme colon double encoded with URL encoding inside numeric HTML entities' => [
'<a href="javascript&#37;&#51;&#65;alert(\'XSS\')">Click me</a>',
'<a href="http://example.com/javascript%3Aalert(\'XSS\')">Click me</a>',
['javascript'],
];

yield 'javascript scheme with a double slash' => [
'<a href="javascript://%0Aalert(\'XSS\')">Click me</a>',
'<a href="unsafe:javascript://%0Aalert(\'xss\')">Click me</a>',
['javascript'],
];

yield 'vbscript scheme blocked' => [
'<a href="vbscript:msgbox(\'XSS\')">Click me</a>',
'<a href="unsafe:vbscript:msgbox(\'XSS\')">Click me</a>',
['javascript', 'vbscript', 'data'],
];

yield 'data scheme blocked' => [
'<a href="data:text/html,<script>alert(\'XSS\')</script>">Click me</a>',
'<a href="unsafe:data:text/html,%3Cscript%3Ealert(\'XSS\')%3C/script%3E">Click me</a>',
['javascript', 'vbscript', 'data'],
];

yield 'safe http scheme unaffected' => [
'<a href="http://example.com/page">HTTP link</a>',
'<a href="http://example.com/page">HTTP link</a>',
['javascript'],
];

yield 'safe http scheme with blanks unaffected' => [
'<a href=" http://example.com/page">HTTP link</a>',
'<a href="http://example.com/page">HTTP link</a>',
['javascript'],
];

yield 'safe https scheme unaffected' => [
'<a href="https://example.com/page">HTTPS link</a>',
'<a href="https://example.com/page">HTTPS link</a>',
['javascript'],
];

yield 'safe mailto scheme unaffected' => [
'<a href="mailto:test@example.com">Email</a>',
'<a href="mailto:test@example.com">Email</a>',
['javascript'],
];

yield 'javascript scheme in form action' => [
'<form action="javascript:alert(\'XSS\')"></form>',
'<form action="unsafe:javascript:alert(\'XSS\')"></form>',
['javascript'],
];

yield 'javascript scheme in blockquote cite' => [
'<blockquote cite="javascript:alert(\'XSS\')">Quote</blockquote>',
'<blockquote cite="unsafe:javascript:alert(\'XSS\')">Quote</blockquote>',
['javascript'],
];

yield 'javascript scheme on mathml descendant href' => [
'<math><maction href="javascript:alert(\'XSS\')">x</maction></math>',
'<math><maction href="unsafe:javascript:alert(\'XSS\')">x</maction></math>',
['javascript'],
];

yield 'javascript scheme on mathml root href' => [
'<math href="javascript:alert(\'XSS\')"><mtext>x</mtext></math>',
'<math href="unsafe:javascript:alert(\'XSS\')"><mtext>x</mtext></math>',
['javascript'],
];

yield 'javascript scheme on svg descendant href' => [
'<svg><a href="javascript:alert(\'XSS\')">x</a></svg>',
'<svg><a href="unsafe:javascript:alert(\'XSS\')">x</a></svg>',
['javascript'],
];

yield 'javascript scheme on svg descendant xlink:href' => [
'<svg xmlns:xlink="http://www.w3.org/1999/xlink"><a xlink:href="javascript:alert(\'XSS\')">x</a></svg>',
'<svg xmlns:xlink="http://www.w3.org/1999/xlink"><a xlink:href="unsafe:javascript:alert(\'XSS\')">x</a></svg>',
['javascript'],
];

yield 'safe scheme on svg descendant xlink:href unaffected' => [
'<svg xmlns:xlink="http://www.w3.org/1999/xlink"><a xlink:href="https://example.com/page">x</a></svg>',
'<svg xmlns:xlink="http://www.w3.org/1999/xlink"><a xlink:href="https://example.com/page">x</a></svg>',
['javascript'],
];
}
}
Loading