Updating HTMLPurifier to 4.9.3

Source: https://htmlpurifier.org/download Release date: 2017-06-19
2017-07-10 13:46:07 +02:00 · 2017-07-10 13:46:07 +02:00 · a4a6a8469e
commit a4a6a8469e
parent 3158f9c33a
54 changed files with 919 additions and 212 deletions
--- a/extlib/HTMLPurifier/HTMLPurifier.includes.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.includes.php
@ -7,7 +7,7 @@
 * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
 * FILE, changes will be overwritten the next time the script is run.
 *
- * @version 4.7.0
+ * @version 4.9.3
 *
 * @warning
 *      You must *not* include any other HTML Purifier files before this file,
@ -137,6 +137,8 @@ require 'HTMLPurifier/AttrTransform/SafeObject.php';
 require 'HTMLPurifier/AttrTransform/SafeParam.php';
 require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
 require 'HTMLPurifier/AttrTransform/TargetBlank.php';
 require 'HTMLPurifier/AttrTransform/TargetNoopener.php';
 require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php';
 require 'HTMLPurifier/AttrTransform/Textarea.php';
 require 'HTMLPurifier/ChildDef/Chameleon.php';
 require 'HTMLPurifier/ChildDef/Custom.php';
@ -175,6 +177,8 @@ require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
 require 'HTMLPurifier/HTMLModule/Tables.php';
 require 'HTMLPurifier/HTMLModule/Target.php';
 require 'HTMLPurifier/HTMLModule/TargetBlank.php';
 require 'HTMLPurifier/HTMLModule/TargetNoopener.php';
 require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php';
 require 'HTMLPurifier/HTMLModule/Text.php';
 require 'HTMLPurifier/HTMLModule/Tidy.php';
 require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@ -225,5 +229,6 @@ require 'HTMLPurifier/URIScheme/https.php';
 require 'HTMLPurifier/URIScheme/mailto.php';
 require 'HTMLPurifier/URIScheme/news.php';
 require 'HTMLPurifier/URIScheme/nntp.php';
 require 'HTMLPurifier/URIScheme/tel.php';
 require 'HTMLPurifier/VarParser/Flexible.php';
 require 'HTMLPurifier/VarParser/Native.php';
--- a/extlib/HTMLPurifier/HTMLPurifier.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.php
@ -19,7 +19,7 @@
 */
 /*
-    HTML Purifier 4.7.0 - Standards Compliant HTML Filtering
+    HTML Purifier 4.9.3 - Standards Compliant HTML Filtering
    Copyright (C) 2006-2008 Edward Z. Yang
    This library is free software; you can redistribute it and/or
@ -58,12 +58,12 @@ class HTMLPurifier
     * Version of HTML Purifier.
     * @type string
     */
-    public $version = '4.7.0';
+    public $version = '4.9.3';
    /**
     * Constant with version of HTML Purifier.
     */
-    const VERSION = '4.7.0';
+    const VERSION = '4.9.3';
    /**
     * Global configuration object.
@ -104,7 +104,7 @@ class HTMLPurifier
    /**
     * Initializes the purifier.
     *
-     * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
+     * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
     *                for all instances of the purifier, if omitted, a default
     *                configuration is supplied (which can be overridden on a
     *                per-use basis).
--- a/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
@ -131,6 +131,8 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoopener.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php';
 require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
 require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
 require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
@ -169,6 +171,8 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoopener.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
 require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@ -219,5 +223,6 @@ require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
 require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
 require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
 require_once $__dir . '/HTMLPurifier/VarParser/Native.php';
--- a/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
@ -19,8 +19,8 @@ class HTMLPurifier_Arborize
            if ($token instanceof HTMLPurifier_Token_End) {
                $token->start = null; // [MUT]
                $r = array_pop($stack);
-                assert($r->name === $token->name);
+                //assert($r->name === $token->name);
-                assert(empty($token->attr));
+                //assert(empty($token->attr));
                $r->endCol = $token->col;
                $r->endLine = $token->line;
                $r->endArmor = $token->armor;
@ -32,7 +32,7 @@ class HTMLPurifier_Arborize
                $stack[] = $node;
            }
        }
-        assert(count($stack) == 1);
+        //assert(count($stack) == 1);
        return $stack[0];
    }
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
@ -21,6 +21,11 @@ class HTMLPurifier_AttrCollections
     * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
     */
    public function __construct($attr_types, $modules)
    {
        $this->doConstruct($attr_types, $modules);
    }
    public function doConstruct($attr_types, $modules)
    {
        // load extensions from the modules
        foreach ($modules as $module) {
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
@ -86,7 +86,13 @@ abstract class HTMLPurifier_AttrDef
     */
    protected function mungeRgb($string)
    {
-        return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
+        $p = '\s*(\d+(\.\d+)?([%]?))\s*';
        if (preg_match('/(rgba|hsla)\(/', $string)) {
            return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
        }
        return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
    }
    /**
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
@ -25,15 +25,42 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
        $css = $this->parseCDATA($css);
        $definition = $config->getCSSDefinition();
        $allow_duplicates = $config->get("CSS.AllowDuplicates");
        // we're going to break the spec and explode by semicolons.
        // This is because semicolon rarely appears in escaped form
        // Doing this is generally flaky but fast
        // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
        // for details
-        $declarations = explode(';', $css);
+        // According to the CSS2.1 spec, the places where a
        // non-delimiting semicolon can appear are in strings
        // escape sequences.   So here is some dumb hack to
        // handle quotes.
        $len = strlen($css);
        $accum = "";
        $declarations = array();
        $quoted = false;
        for ($i = 0; $i < $len; $i++) {
            $c = strcspn($css, ";'\"", $i);
            $accum .= substr($css, $i, $c);
            $i += $c;
            if ($i == $len) break;
            $d = $css[$i];
            if ($quoted) {
                $accum .= $d;
                if ($d == $quoted) {
                    $quoted = false;
                }
            } else {
                if ($d == ";") {
                    $declarations[] = $accum;
                    $accum = "";
                } else {
                    $accum .= $d;
                    $quoted = $d;
                }
            }
        }
        if ($accum != "") $declarations[] = $accum;
        $propvalues = array();
        $new_declarations = '';
        /**
         * Name of the current CSS property being validated.
@ -83,7 +110,11 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
            if ($result === false) {
                continue;
            }
-            $propvalues[$property] = $result;
+            if ($allow_duplicates) {
                $new_declarations .= "$property:$result;";
            } else {
                $propvalues[$property] = $result;
            }
        }
        $context->destroy('CurrentCSSProperty');
@ -92,7 +123,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
        // slightly inefficient, but it's the only way of getting rid of
        // duplicates. Perhaps config to optimize it, but not now.
        $new_declarations = '';
        foreach ($propvalues as $prop => $value) {
            $new_declarations .= "$prop:$value;";
        }
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
@ -6,6 +6,16 @@
 class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
 {
    /**
     * @type HTMLPurifier_AttrDef_CSS_AlphaValue
     */
    protected $alpha;
    public function __construct()
    {
        $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
    }
    /**
     * @param string $color
     * @param HTMLPurifier_Config $config
@ -29,59 +39,104 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
            return $colors[$lower];
        }
-        if (strpos($color, 'rgb(') !== false) {
+        if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
            // rgb literal handling
            $length = strlen($color);
            if (strpos($color, ')') !== $length - 1) {
                return false;
            }
-            $triad = substr($color, 4, $length - 4 - 1);
+
-            $parts = explode(',', $triad);
+            // get used function : rgb, rgba, hsl or hsla
-            if (count($parts) !== 3) {
+            $function = $matches[1];
            $parameters_size = 3;
            $alpha_channel = false;
            if (substr($function, -1) === 'a') {
                $parameters_size = 4;
                $alpha_channel = true;
            }
            /*
             * Allowed types for values :
             * parameter_position => [type => max_value]
             */
            $allowed_types = array(
                1 => array('percentage' => 100, 'integer' => 255),
                2 => array('percentage' => 100, 'integer' => 255),
                3 => array('percentage' => 100, 'integer' => 255),
            );
            $allow_different_types = false;
            if (strpos($function, 'hsl') !== false) {
                $allowed_types = array(
                    1 => array('integer' => 360),
                    2 => array('percentage' => 100),
                    3 => array('percentage' => 100),
                );
                $allow_different_types = true;
            }
            $values = trim(str_replace($function, '', $color), ' ()');
            $parts = explode(',', $values);
            if (count($parts) !== $parameters_size) {
                return false;
            }
-            $type = false; // to ensure that they're all the same type
+
            $type = false;
            $new_parts = array();
            $i = 0;
            foreach ($parts as $part) {
                $i++;
                $part = trim($part);
                if ($part === '') {
                    return false;
                }
-                $length = strlen($part);
+
-                if ($part[$length - 1] === '%') {
+                // different check for alpha channel
-                    // handle percents
+                if ($alpha_channel === true && $i === count($parts)) {
-                    if (!$type) {
+                    $result = $this->alpha->validate($part, $config, $context);
-                        $type = 'percentage';
+
-                    } elseif ($type !== 'percentage') {
+                    if ($result === false) {
                        return false;
                    }
-                    $num = (float)substr($part, 0, $length - 1);
+
-                    if ($num < 0) {
+                    $new_parts[] = (string)$result;
-                        $num = 0;
+                    continue;
-                    }
+                }
-                    if ($num > 100) {
+
-                        $num = 100;
+                if (substr($part, -1) === '%') {
-                    }
+                    $current_type = 'percentage';
                    $new_parts[] = "$num%";
                } else {
-                    // handle integers
+                    $current_type = 'integer';
-                    if (!$type) {
+                }
-                        $type = 'integer';
+
-                    } elseif ($type !== 'integer') {
+                if (!array_key_exists($current_type, $allowed_types[$i])) {
-                        return false;
+                    return false;
-                    }
+                }
-                    $num = (int)$part;
+
-                    if ($num < 0) {
+                if (!$type) {
-                        $num = 0;
+                    $type = $current_type;
-                    }
+                }
-                    if ($num > 255) {
+
-                        $num = 255;
+                if ($allow_different_types === false && $type != $current_type) {
-                    }
+                    return false;
-                    $new_parts[] = (string)$num;
+                }
                $max_value = $allowed_types[$i][$current_type];
                if ($current_type == 'integer') {
                    // Return value between range 0 -> $max_value
                    $new_parts[] = (int)max(min($part, $max_value), 0);
                } elseif ($current_type == 'percentage') {
                    $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
                }
            }
-            $new_triad = implode(',', $new_parts);
+
-            $color = "rgb($new_triad)";
+            $new_values = implode(',', $new_parts);
            $color = $function . '(' . $new_values . ')';
        } else {
            // hexadecimal handling
            if ($color[0] === '#') {
@ -100,6 +155,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
        }
        return $color;
    }
 }
 // vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
@ -130,6 +130,8 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
            //    <http://ja.wikipedia.org/wiki/MS_明朝>.  See
            //    the CSS3 spec for more examples:
            //    <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
            //    You can see live samples of these on the Internet:
            //    <http://www.google.co.jp/search?q=font-family+ＭＳ+明朝|ゴシック>
            //    However, most of these fonts have ASCII equivalents:
            //    for example, 'MS Mincho', and it's considered
            //    professional to use ASCII font names instead of
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php
@ -33,6 +33,9 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
            return false;
        }
        $uri_string = substr($uri_string, 4);
        if (strlen($uri_string) == 0) {
            return false;
        }
        $new_length = strlen($uri_string) - 1;
        if ($uri_string[$new_length] != ')') {
            return false;
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php
@ -72,18 +72,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
        // we purposely avoid using regex, hopefully this is faster
-        if (ctype_alpha($id)) {
+        if ($config->get('Attr.ID.HTML5') === true) {
-            $result = true;
+            if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
        } else {
            if (!ctype_alpha(@$id[0])) {
                return false;
            }
-            // primitive style of regexps, I suppose
+        } else {
-            $trim = trim(
+            if (ctype_alpha($id)) {
-                $id,
+                // OK
-                'A..Za..z0..9:-._'
+            } else {
-            );
+                if (!ctype_alpha(@$id[0])) {
-            $result = ($trim === '');
+                    return false;
                }
                // primitive style of regexps, I suppose
                $trim = trim(
                    $id,
                    'A..Za..z0..9:-._'
                );
                if ($trim !== '') {
                    return false;
                }
            }
        }
        $regexp = $config->get('Attr.IDBlacklistRegexp');
@ -91,14 +99,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
            return false;
        }
-        if (!$this->selector && $result) {
+        if (!$this->selector) {
            $id_accumulator->add($id);
        }
        // if no change was made to the ID, return the result
        // else, return the new id if stripping whitespace made it
        //     valid, or return false.
-        return $result ? $id : false;
+        return $id;
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php
@ -76,24 +76,33 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
        // fairly well supported.
        $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
        // Based off of RFC 1738, but amended so that
        // as per RFC 3696, the top label need only not be all numeric.
        // The productions describing this are:
        $a   = '[a-z]';     // alpha
        $an  = '[a-z0-9]';  // alphanum
        $and = "[a-z0-9-$underscore]"; // alphanum | "-"
        // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
-        $domainlabel = "$an($and*$an)?";
+        $domainlabel = "$an(?:$and*$an)?";
-        // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
+        // AMENDED as per RFC 3696
-        $toplabel = "$a($and*$an)?";
+        // toplabel    = alphanum | alphanum *( alphanum | "-" ) alphanum
        //      side condition: not all numeric
        $toplabel = "$an(?:$and*$an)?";
        // hostname    = *( domainlabel "." ) toplabel [ "." ]
-        if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
+        if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
-            return $string;
+            if (!ctype_digit($matches[1])) {
                return $string;
            }
        }
        // PHP 5.3 and later support this functionality natively
        if (function_exists('idn_to_ascii')) {
            $string = idn_to_ascii($string);
        // If we have Net_IDNA2 support, we can support IRIs by
        // punycoding them. (This is the most portable thing to do,
        // since otherwise we have to assume browsers support
-
+        } elseif ($config->get('Core.EnableIDNA')) {
        if ($config->get('Core.EnableIDNA')) {
            $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
            // we need to encode each period separately
            $parts = explode('.', $string);
@ -114,13 +123,14 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
                    }
                }
                $string = implode('.', $new_parts);
                if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
                    return $string;
                }
            } catch (Exception $e) {
                // XXX error reporting
            }
        }
        // Try again
        if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
            return $string;
        }
        return false;
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php
@ -32,8 +32,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
            if ($src) {
                $alt = $config->get('Attr.DefaultImageAlt');
                if ($alt === null) {
-                    // truncate if the alt is too long
+                    $attr['alt'] = basename($attr['src']);
                    $attr['alt'] = substr(basename($attr['src']), 0, 40);
                } else {
                    $attr['alt'] = $alt;
                }
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoopener.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoopener.php
@ -0,0 +1,37 @@
 <?php
 // must be called POST validation
 /**
 * Adds rel="noopener" to any links which target a different window
 * than the current one.  This is used to prevent malicious websites
 * from silently replacing the original window, which could be used
 * to do phishing.
 * This transform is controlled by %HTML.TargetNoopener.
 */
 class HTMLPurifier_AttrTransform_TargetNoopener extends HTMLPurifier_AttrTransform
 {
    /**
     * @param array $attr
     * @param HTMLPurifier_Config $config
     * @param HTMLPurifier_Context $context
     * @return array
     */
    public function transform($attr, $config, $context)
    {
        if (isset($attr['rel'])) {
            $rels = explode(' ', $attr['rel']);
        } else {
            $rels = array();
        }
        if (isset($attr['target']) && !in_array('noopener', $rels)) {
            $rels[] = 'noopener';
        }
        if (!empty($rels) || isset($attr['rel'])) {
            $attr['rel'] = implode(' ', $rels);
        }
        return $attr;
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoreferrer.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoreferrer.php
@ -0,0 +1,37 @@
 <?php
 // must be called POST validation
 /**
 * Adds rel="noreferrer" to any links which target a different window
 * than the current one.  This is used to prevent malicious websites
 * from silently replacing the original window, which could be used
 * to do phishing.
 * This transform is controlled by %HTML.TargetNoreferrer.
 */
 class HTMLPurifier_AttrTransform_TargetNoreferrer extends HTMLPurifier_AttrTransform
 {
    /**
     * @param array $attr
     * @param HTMLPurifier_Config $config
     * @param HTMLPurifier_Context $context
     * @return array
     */
    public function transform($attr, $config, $context)
    {
        if (isset($attr['rel'])) {
            $rels = explode(' ', $attr['rel']);
        } else {
            $rels = array();
        }
        if (isset($attr['target']) && !in_array('noreferrer', $rels)) {
            $rels[] = 'noreferrer';
        }
        if (!empty($rels) || isset($attr['rel'])) {
            $attr['rel'] = implode(' ', $rels);
        }
        return $attr;
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/CSSDefinition.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/CSSDefinition.php
@ -225,6 +225,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
        );
        $max = $config->get('CSS.MaxImgLength');
        $this->info['min-width'] =
        $this->info['max-width'] =
        $this->info['min-height'] =
        $this->info['max-height'] =
        $this->info['width'] =
        $this->info['height'] =
            $max === null ?
@ -370,6 +374,19 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
        );
        $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
        $border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
            array(
                new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
                new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
            ));
        $this->info['border-top-left-radius'] =
        $this->info['border-top-right-radius'] =
        $this->info['border-bottom-right-radius'] =
        $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
        // TODO: support SLASH syntax
        $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
    }
    /**
--- a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/List.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/List.php
@ -38,13 +38,19 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
            return false;
        }
        // if li is not allowed, delete parent node
        if (!isset($config->getHTMLDefinition()->info['li'])) {
            trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING);
            return false;
        }
        // the new set of children
        $result = array();
        // a little sanity check to make sure it's not ALL whitespace
        $all_whitespace = true;
-        $current_li = false;
+        $current_li = null;
        foreach ($children as $node) {
            if (!empty($node->is_whitespace)) {
@ -65,7 +71,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
                // to handle non-list elements; non-list elements should
                // not be appended to an existing li; only li created
                // for non-list. This distinction is not currently made.
-                if ($current_li === false) {
+                if ($current_li === null) {
                    $current_li = new HTMLPurifier_Node_Element('li');
                    $result[] = $current_li;
                }
--- a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php
@ -203,7 +203,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
                    $current_tr_tbody->children[] = $node;
                    break;
                case '#PCDATA':
-                    assert($node->is_whitespace);
+                    //assert($node->is_whitespace);
                    if ($current_tr_tbody === null) {
                        $ret[] = $node;
                    } else {
--- a/extlib/HTMLPurifier/HTMLPurifier/Config.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Config.php
@ -21,7 +21,7 @@ class HTMLPurifier_Config
     * HTML Purifier's version
     * @type string
     */
-    public $version = '4.7.0';
+    public $version = '4.9.3';
    /**
     * Whether or not to automatically finalize
@ -333,7 +333,7 @@ class HTMLPurifier_Config
        }
        // Raw type might be negative when using the fully optimized form
-        // of stdclass, which indicates allow_null == true
+        // of stdClass, which indicates allow_null == true
        $rtype = is_int($def) ? $def : $def->type;
        if ($rtype < 0) {
            $type = -$rtype;
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php
@ -24,11 +24,11 @@ class HTMLPurifier_ConfigSchema
     *
     *  array(
     *      'Namespace' => array(
-     *          'Directive' => new stdclass(),
+     *          'Directive' => new stdClass(),
     *      )
     *  )
     *
-     * The stdclass may have the following properties:
+     * The stdClass may have the following properties:
     *
     *  - If isAlias isn't set:
     *      - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
@ -39,8 +39,8 @@ class HTMLPurifier_ConfigSchema
     *      - namespace: Namespace this directive aliases to
     *      - name: Directive name this directive aliases to
     *
-     * In certain degenerate cases, stdclass will actually be an integer. In
+     * In certain degenerate cases, stdClass will actually be an integer. In
-     * that case, the value is equivalent to an stdclass with the type
+     * that case, the value is equivalent to an stdClass with the type
     * property set to the integer. If the integer is negative, type is
     * equal to the absolute value of integer, and allow_null is true.
     *
@ -105,7 +105,7 @@ class HTMLPurifier_ConfigSchema
     */
    public function add($key, $default, $type, $allow_null)
    {
-        $obj = new stdclass();
+        $obj = new stdClass();
        $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
        if ($allow_null) {
            $obj->allow_null = true;
@ -152,14 +152,14 @@ class HTMLPurifier_ConfigSchema
     */
    public function addAlias($key, $new_key)
    {
-        $obj = new stdclass;
+        $obj = new stdClass;
        $obj->key = $new_key;
        $obj->isAlias = true;
        $this->info[$key] = $obj;
    }
    /**
-     * Replaces any stdclass that only has the type property with type integer.
+     * Replaces any stdClass that only has the type property with type integer.
     */
    public function postProcess()
    {
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
@ -0,0 +1,10 @@
 Attr.ID.HTML5
 TYPE: bool/null
 DEFAULT: null
 VERSION: 4.8.0
 --DESCRIPTION--
 In HTML5, restrictions on the format of the id attribute have been significantly
 relaxed, such that any string is valid so long as it contains no spaces and
 is at least one character.  In lieu of a general HTML5 compatibility flag,
 set this configuration directive to true to use the relaxed rules.
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt
@ -0,0 +1,11 @@
 CSS.AllowDuplicates
 TYPE: bool
 DEFAULT: false
 VERSION: 4.8.0
 --DESCRIPTION--
 <p>
  By default, HTML Purifier removes duplicate CSS properties,
  like <code>color:red; color:blue</code>.  If this is set to
  true, duplicate properties are allowed.
 </p>
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt
@ -1,5 +1,5 @@
 Cache.SerializerPermissions
-TYPE: int
+TYPE: int/null
 VERSION: 4.3.0
 DEFAULT: 0755
 --DESCRIPTION--
@ -8,4 +8,9 @@ DEFAULT: 0755
    Directory permissions of the files and directories created inside
    the DefinitionCache/Serializer or other custom serializer path.
 </p>
 <p>
    In HTML Purifier 4.8.0, this also supports <code>NULL</code>,
    which means that no chmod'ing or directory creation shall
    occur.
 </p>
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
@ -0,0 +1,16 @@
 Core.AggressivelyRemoveScript
 TYPE: bool
 VERSION: 4.9.0
 DEFAULT: true
 --DESCRIPTION--
 <p>
    This directive enables aggressive pre-filter removal of
    script tags.  This is not necessary for security,
    but it can help work around a bug in libxml where embedded
    HTML elements inside script sections cause the parser to
    choke.  To revert to pre-4.9.0 behavior, set this to false.
    This directive has no effect if %Core.Trusted is true,
    %Core.RemoveScriptContents is false, or %Core.HiddenElements
    does not contain script.
 </p>
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
@ -0,0 +1,36 @@
 Core.LegacyEntityDecoder
 TYPE: bool
 VERSION: 4.9.0
 DEFAULT: false
 --DESCRIPTION--
 <p>
    Prior to HTML Purifier 4.9.0, entities were decoded by performing
    a global search replace for all entities whose decoded versions
    did not have special meanings under HTML, and replaced them with
    their decoded versions.  We would match all entities, even if they did
    not have a trailing semicolon, but only if there weren't any trailing
    alphanumeric characters.
 </p>
 <table>
 <tr><th>Original</th><th>Text</th><th>Attribute</th></tr>
 <tr><td>&amp;yen;</td><td>&yen;</td><td>&yen;</td></tr>
 <tr><td>&amp;yen</td><td>&yen;</td><td>&yen;</td></tr>
 <tr><td>&amp;yena</td><td>&amp;yena</td><td>&amp;yena</td></tr>
 <tr><td>&amp;yen=</td><td>&yen;=</td><td>&yen;=</td></tr>
 </table>
 <p>
    In HTML Purifier 4.9.0, we changed the behavior of entity parsing
    to match entities that had missing trailing semicolons in less
    cases, to more closely match HTML5 parsing behavior:
 </p>
 <table>
 <tr><th>Original</th><th>Text</th><th>Attribute</th></tr>
 <tr><td>&amp;yen;</td><td>&yen;</td><td>&yen;</td></tr>
 <tr><td>&amp;yen</td><td>&yen;</td><td>&yen;</td></tr>
 <tr><td>&amp;yena</td><td>&yen;a</td><td>&amp;yena</td></tr>
 <tr><td>&amp;yen=</td><td>&yen;=</td><td>&amp;yen=</td></tr>
 </table>
 <p>
    This flag reverts back to pre-HTML Purifier 4.9.0 behavior.
 </p>
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
@ -0,0 +1,10 @@
 --# vim: et sw=4 sts=4
 HTML.TargetNoopener
 TYPE: bool
 VERSION: 4.8.0
 DEFAULT: TRUE
 --DESCRIPTION--
 If enabled, noopener rel attributes are added to links which have
 a target attribute associated with them.  This prevents malicious
 destinations from overwriting the original window.
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
@ -0,0 +1,9 @@
 HTML.TargetNoreferrer
 TYPE: bool
 VERSION: 4.8.0
 DEFAULT: TRUE
 --DESCRIPTION--
 If enabled, noreferrer rel attributes are added to links which have
 a target attribute associated with them.  This prevents malicious
 destinations from overwriting the original window.
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
@ -8,6 +8,7 @@ array (
  'ftp' => true,
  'nntp' => true,
  'news' => true,
  'tel' => true,
 )
 --DESCRIPTION--
 Whitelist that defines the schemes that a URI is allowed to have.  This
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
@ -1,5 +1,5 @@
 URI.DefaultScheme
-TYPE: string
+TYPE: string/null
 DEFAULT: 'http'
 --DESCRIPTION--
@ -7,4 +7,9 @@ DEFAULT: 'http'
    Defines through what scheme the output will be served, in order to
    select the proper object validator when no scheme information is present.
 </p>
 <p>
    Starting with HTML Purifier 4.9.0, the default scheme can be null, in
    which case we reject all URIs which do not have explicit schemes.
 </p>
 --# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt
@ -9,75 +9,75 @@ DEFAULT: NULL
    absolute URIs into another URI, usually a URI redirection service.
    This directive accepts a URI, formatted with a <code>%s</code> where
    the url-encoded original URI should be inserted (sample:
-    <code>https://searx.laquadrature.net/?q=%s</code>).
+    <code>http://www.google.com/url?q=%s</code>).
-	</p>
+</p>
-	<p>
+<p>
    Uses for this directive:
-	</p>
+</p>
-	<ul>
+<ul>
    <li>
-    Prevent PageRank leaks, while being fairly transparent
+        Prevent PageRank leaks, while being fairly transparent
-    to users (you may also want to add some client side JavaScript to
+        to users (you may also want to add some client side JavaScript to
-    override the text in the statusbar). <strong>Notice</strong>:
+        override the text in the statusbar). <strong>Notice</strong>:
-    Many security experts believe that this form of protection does not deter spam-bots.
+        Many security experts believe that this form of protection does not deter spam-bots.
    </li>
    <li>
-    Redirect users to a splash page telling them they are leaving your
+        Redirect users to a splash page telling them they are leaving your
-    website. While this is poor usability practice, it is often mandated
+        website. While this is poor usability practice, it is often mandated
-    in corporate environments.
+        in corporate environments.
    </li>
-	</ul>
+</ul>
-	<p>
+<p>
    Prior to HTML Purifier 3.1.1, this directive also enabled the munging
    of browsable external resources, which could break things if your redirection
    script was a splash page or used <code>meta</code> tags. To revert to
    previous behavior, please use %URI.MungeResources.
-	</p>
+</p>
-	<p>
+<p>
    You may want to also use %URI.MungeSecretKey along with this directive
    in order to enforce what URIs your redirector script allows. Open
    redirector scripts can be a security risk and negatively affect the
    reputation of your domain name.
-	</p>
+</p>
-	<p>
+<p>
    Starting with HTML Purifier 3.1.1, there is also these substitutions:
-	</p>
+</p>
-	<table>
+<table>
    <thead>
-    <tr>
+        <tr>
-    <th>Key</th>
+            <th>Key</th>
-    <th>Description</th>
+            <th>Description</th>
-    <th>Example <code>&lt;a href=""&gt;</code></th>
+            <th>Example <code>&lt;a href=""&gt;</code></th>
-    </tr>
+        </tr>
    </thead>
    <tbody>
-    <tr>
+        <tr>
-    <td>%r</td>
+            <td>%r</td>
-    <td>1 - The URI embeds a resource<br />(blank) - The URI is merely a link</td>
+            <td>1 - The URI embeds a resource<br />(blank) - The URI is merely a link</td>
-    <td></td>
+            <td></td>
-    </tr>
+        </tr>
-    <tr>
+        <tr>
-    <td>%n</td>
+            <td>%n</td>
-    <td>The name of the tag this URI came from</td>
+            <td>The name of the tag this URI came from</td>
-    <td>a</td>
+            <td>a</td>
-    </tr>
+        </tr>
-    <tr>
+        <tr>
-    <td>%m</td>
+            <td>%m</td>
-    <td>The name of the attribute this URI came from</td>
+            <td>The name of the attribute this URI came from</td>
-    <td>href</td>
+            <td>href</td>
-    </tr>
+        </tr>
-    <tr>
+        <tr>
-    <td>%p</td>
+            <td>%p</td>
-    <td>The name of the CSS property this URI came from, or blank if irrelevant</td>
+            <td>The name of the CSS property this URI came from, or blank if irrelevant</td>
-    <td></td>
+            <td></td>
-    </tr>
+        </tr>
    </tbody>
-	</table>
+</table>
-	<p>
+<p>
    Admittedly, these letters are somewhat arbitrary; the only stipulation
    was that they couldn't be a through f. r is for resource (I would have preferred
    e, but you take what you can get), n is for name, m
    was picked because it came after n (and I couldn't use a), p is for
    property.
-	</p>
+</p>
-	--# vim: et sw=4 sts=4
+--# vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php
@ -118,7 +118,7 @@ abstract class HTMLPurifier_DefinitionCache
    /**
     * Clears all expired (older version or revision) objects from cache
-     * @note Be carefuly implementing this method as flush. Flush must
+     * @note Be careful implementing this method as flush. Flush must
     *       not interfere with other Definition types, and cleanup()
     *       should not be repeatedly called by userland code.
     * @param HTMLPurifier_Config $config
--- a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php
@ -97,6 +97,12 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
        }
        $dir = $this->generateDirectoryPath($config);
        $dh = opendir($dir);
        // Apparently, on some versions of PHP, readdir will return
        // an empty string if you pass an invalid argument to readdir.
        // So you need this test.  See #49.
        if (false === $dh) {
            return false;
        }
        while (false !== ($filename = readdir($dh))) {
            if (empty($filename)) {
                continue;
@ -106,6 +112,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
            }
            unlink($dir . '/' . $filename);
        }
        closedir($dh);
        return true;
    }
    /**
@ -119,6 +127,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
        }
        $dir = $this->generateDirectoryPath($config);
        $dh = opendir($dir);
        // See #49 (and above).
        if (false === $dh) {
            return false;
        }
        while (false !== ($filename = readdir($dh))) {
            if (empty($filename)) {
                continue;
@ -131,6 +143,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
                unlink($dir . '/' . $filename);
            }
        }
        closedir($dh);
        return true;
    }
    /**
@ -186,11 +200,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
        if ($result !== false) {
            // set permissions of the new file (no execute)
            $chmod = $config->get('Cache.SerializerPermissions');
-            if (!$chmod) {
+            if ($chmod !== null) {
-                $chmod = 0644; // invalid config or simpletest
+                chmod($file, $chmod & 0666);
            }
            $chmod = $chmod & 0666;
            chmod($file, $chmod);
        }
        return $result;
    }
@ -204,8 +216,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
    {
        $directory = $this->generateDirectoryPath($config);
        $chmod = $config->get('Cache.SerializerPermissions');
-        if (!$chmod) {
+        if ($chmod === null) {
-            $chmod = 0755; // invalid config or simpletest
+            // TODO: This races
            if (is_dir($directory)) return true;
            return mkdir($directory);
        }
        if (!is_dir($directory)) {
            $base = $this->generateBaseDirectoryPath($config);
@ -219,15 +233,16 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
            } elseif (!$this->_testPermissions($base, $chmod)) {
                return false;
            }
-            mkdir($directory, $chmod);
+            if (!mkdir($directory, $chmod)) {
            if (!$this->_testPermissions($directory, $chmod)) {
                trigger_error(
-                    'Base directory ' . $base . ' does not exist,
+                    'Could not create directory ' . $directory . '',
                    please create or change using %Cache.SerializerPath',
                    E_USER_WARNING
                );
                return false;
            }
            if (!$this->_testPermissions($directory, $chmod)) {
                return false;
            }
        } elseif (!$this->_testPermissions($directory, $chmod)) {
            return false;
        }
@ -256,7 +271,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
            );
            return false;
        }
-        if (function_exists('posix_getuid')) {
+        if (function_exists('posix_getuid') && $chmod !== null) {
            // POSIX system, we can give more specific advice
            if (fileowner($dir) === posix_getuid()) {
                // we can chmod it ourselves
--- a/extlib/HTMLPurifier/HTMLPurifier/Encoder.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php
@ -101,6 +101,14 @@ class HTMLPurifier_Encoder
     * It will parse according to UTF-8 and return a valid UTF8 string, with
     * non-SGML codepoints excluded.
     *
     * Specifically, it will permit:
     * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
     * Source: https://www.w3.org/TR/REC-xml/#NT-Char
     * Arguably this function should be modernized to the HTML5 set
     * of allowed characters:
     * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
     * which simultaneously expand and restrict the set of allowed characters.
     *
     * @param string $str The string to clean
     * @param bool $force_php
     * @return string
@ -122,15 +130,12 @@ class HTMLPurifier_Encoder
     *       function that needs to be able to understand UTF-8 characters.
     *       As of right now, only smart lossless character encoding converters
     *       would need that, and I'm probably not going to implement them.
     *       Once again, PHP 6 should solve all our problems.
     */
    public static function cleanUTF8($str, $force_php = false)
    {
        // UTF-8 validity is checked since PHP 4.3.5
        // This is an optimization: if the string is already valid UTF-8, no
        // need to do PHP stuff. 99% of the time, this will be the case.
        // The regexp matches the XML char production, as well as well as excluding
        // non-SGML codepoints U+007F to U+009F
        if (preg_match(
            '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
            $str
@ -255,6 +260,7 @@ class HTMLPurifier_Encoder
                                // 7F-9F is not strictly prohibited by XML,
                                // but it is non-SGML, and thus we don't allow it
                                (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
                                (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
                                (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
                            )
                        ) {
--- a/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php
@ -16,6 +16,138 @@ class HTMLPurifier_EntityParser
     */
    protected $_entity_lookup;
    /**
     * Callback regex string for entities in text.
     * @type string
     */
    protected $_textEntitiesRegex;
    /**
     * Callback regex string for entities in attributes.
     * @type string
     */
    protected $_attrEntitiesRegex;
    /**
     * Tests if the beginning of a string is a semi-optional regex
     */
    protected $_semiOptionalPrefixRegex;
    public function __construct() {
        // From
        // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
        $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
        // NB: three empty captures to put the fourth match in the right
        // place
        $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
        $this->_textEntitiesRegex =
            '/&(?:'.
            // hex
            '[#]x([a-fA-F0-9]+);?|'.
            // dec
            '[#]0*(\d+);?|'.
            // string (mandatory semicolon)
            // NB: order matters: match semicolon preferentially
            '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
            // string (optional semicolon)
            "($semi_optional)".
            ')/';
        $this->_attrEntitiesRegex =
            '/&(?:'.
            // hex
            '[#]x([a-fA-F0-9]+);?|'.
            // dec
            '[#]0*(\d+);?|'.
            // string (mandatory semicolon)
            // NB: order matters: match semicolon preferentially
            '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
            // string (optional semicolon)
            // don't match if trailing is equals or alphanumeric (URL
            // like)
            "($semi_optional)(?![=;A-Za-z0-9])".
            ')/';
    }
    /**
     * Substitute entities with the parsed equivalents.  Use this on
     * textual data in an HTML document (as opposed to attributes.)
     *
     * @param string $string String to have entities parsed.
     * @return string Parsed string.
     */
    public function substituteTextEntities($string)
    {
        return preg_replace_callback(
            $this->_textEntitiesRegex,
            array($this, 'entityCallback'),
            $string
        );
    }
    /**
     * Substitute entities with the parsed equivalents.  Use this on
     * attribute contents in documents.
     *
     * @param string $string String to have entities parsed.
     * @return string Parsed string.
     */
    public function substituteAttrEntities($string)
    {
        return preg_replace_callback(
            $this->_attrEntitiesRegex,
            array($this, 'entityCallback'),
            $string
        );
    }
    /**
     * Callback function for substituteNonSpecialEntities() that does the work.
     *
     * @param array $matches  PCRE matches array, with 0 the entire match, and
     *                  either index 1, 2 or 3 set with a hex value, dec value,
     *                  or string (respectively).
     * @return string Replacement string.
     */
    protected function entityCallback($matches)
    {
        $entity = $matches[0];
        $hex_part = @$matches[1];
        $dec_part = @$matches[2];
        $named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
        if ($hex_part !== NULL && $hex_part !== "") {
            return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
        } elseif ($dec_part !== NULL && $dec_part !== "") {
            return HTMLPurifier_Encoder::unichr((int) $dec_part);
        } else {
            if (!$this->_entity_lookup) {
                $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
            }
            if (isset($this->_entity_lookup->table[$named_part])) {
                return $this->_entity_lookup->table[$named_part];
            } else {
                // exact match didn't match anything, so test if
                // any of the semicolon optional match the prefix.
                // Test that this is an EXACT match is important to
                // prevent infinite loop
                if (!empty($matches[3])) {
                    return preg_replace_callback(
                        $this->_semiOptionalPrefixRegex,
                        array($this, 'entityCallback'),
                        $entity
                    );
                }
                return $entity;
            }
        }
    }
    // LEGACY CODE BELOW
    /**
     * Callback regex string for parsing entities.
     * @type string
@ -144,7 +276,7 @@ class HTMLPurifier_EntityParser
                $entity;
        } else {
            return isset($this->_special_ent2dec[$matches[3]]) ?
-                $this->_special_ent2dec[$matches[3]] :
+                $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
                $entity;
        }
    }
--- a/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php
@ -95,7 +95,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
        if ($tidy !== null) {
            $this->_tidy = $tidy;
        }
-        $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
+        // NB: this must be NON-greedy because if we have
        // <style>foo</style>  <style>bar</style>
        // we must not grab foo</style>  <style>bar
        $html = preg_replace_callback('#<style(?:\s.*)?>(.*)<\/style>#isU', array($this, 'styleCallback'), $html);
        $style_blocks = $this->_styleMatches;
        $this->_styleMatches = array(); // reset
        $context->register('StyleBlocks', $style_blocks); // $context must not be reused
--- a/extlib/HTMLPurifier/HTMLPurifier/Generator.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Generator.php
@ -146,7 +146,7 @@ class HTMLPurifier_Generator
            $attr = $this->generateAttributes($token->attr, $token->name);
            if ($this->_flashCompat) {
                if ($token->name == "object") {
-                    $flash = new stdclass();
+                    $flash = new stdClass();
                    $flash->attr = $token->attr;
                    $flash->param = array();
                    $this->_flashStack[] = $flash;
--- a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/TargetNoopener.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/TargetNoopener.php
@ -0,0 +1,21 @@
 <?php
 /**
 * Module adds the target-based noopener attribute transformation to a tags.  It
 * is enabled by HTML.TargetNoopener
 */
 class HTMLPurifier_HTMLModule_TargetNoopener extends HTMLPurifier_HTMLModule
 {
    /**
     * @type string
     */
    public $name = 'TargetNoopener';
    /**
     * @param HTMLPurifier_Config $config
     */
    public function setup($config) {
        $a = $this->addBlankElement('a');
        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoopener();
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/TargetNoreferrer.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/TargetNoreferrer.php
@ -0,0 +1,21 @@
 <?php
 /**
 * Module adds the target-based noreferrer attribute transformation to a tags.  It
 * is enabled by HTML.TargetNoreferrer
 */
 class HTMLPurifier_HTMLModule_TargetNoreferrer extends HTMLPurifier_HTMLModule
 {
    /**
     * @type string
     */
    public $name = 'TargetNoreferrer';
    /**
     * @param HTMLPurifier_Config $config
     */
    public function setup($config) {
        $a = $this->addBlankElement('a');
        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoreferrer();
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/HTMLModuleManager.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModuleManager.php
@ -271,6 +271,14 @@ class HTMLPurifier_HTMLModuleManager
        if ($config->get('HTML.TargetBlank')) {
            $modules[] = 'TargetBlank';
        }
        // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank
        // so that its post-attr-transform gets run afterwards.
        if ($config->get('HTML.TargetNoreferrer')) {
            $modules[] = 'TargetNoreferrer';
        }
        if ($config->get('HTML.TargetNoopener')) {
            $modules[] = 'TargetNoopener';
        }
        // merge in custom modules
        $modules = array_merge($modules, $this->userModules);
--- a/extlib/HTMLPurifier/HTMLPurifier/Injector/Linkify.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/Linkify.php
@ -27,13 +27,18 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
        if (strpos($token->data, '://') === false) {
            // our really quick heuristic failed, abort
            // this may not work so well if we want to match things like
-            // "domainname.com", but then again, most people don't
+            // "google.com", but then again, most people don't
            return;
        }
-        // there is/are URL(s). Let's split the string:
+        // there is/are URL(s). Let's split the string.
-        // Note: this regex is extremely permissive
+        // We use this regex:
-        $bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
+        // https://gist.github.com/gruber/249502
        // but with @cscott's backtracking fix and also
        // the Unicode characters un-Unicodified.
        $bits = preg_split(
            '/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
            $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
        $token = array();
--- a/extlib/HTMLPurifier/HTMLPurifier/Injector/RemoveEmpty.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/RemoveEmpty.php
@ -46,6 +46,12 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
        $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
        $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
        $this->exclude = $config->get('AutoFormat.RemoveEmpty.Predicate');
        foreach ($this->exclude as $key => $attrs) {
            if (!is_array($attrs)) {
                // HACK, see HTMLPurifier/Printer/ConfigForm.php
                $this->exclude[$key] = explode(';', $attrs);
            }
        }
        $this->attrValidator = new HTMLPurifier_AttrValidator();
    }
--- a/extlib/HTMLPurifier/HTMLPurifier/Injector/SafeObject.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/SafeObject.php
@ -36,6 +36,7 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
    );
    /**
     * These are all lower-case keys.
     * @type array
     */
    protected $allowedParam = array(
@ -43,7 +44,7 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
        'movie' => true,
        'flashvars' => true,
        'src' => true,
-        'allowFullScreen' => true, // if omitted, assume to be 'false'
+        'allowfullscreen' => true, // if omitted, assume to be 'false'
    );
    /**
@ -93,9 +94,11 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
                    $token->attr['name'] === $this->addParam[$n]) {
                    // keep token, and add to param stack
                    $this->paramStack[$i][$n] = true;
-                } elseif (isset($this->allowedParam[$n])) {
+                } elseif (isset($this->allowedParam[strtolower($n)])) {
                    // keep token, don't do anything to it
                    // (could possibly check for duplicates here)
                    // Note: In principle, parameters should be case sensitive.
                    // But it seems they are not really; so accept any case.
                } else {
                    $token = false;
                }
--- a/extlib/HTMLPurifier/HTMLPurifier/Lexer.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer.php
@ -96,7 +96,7 @@ class HTMLPurifier_Lexer
                        break;
                    }
-                    if (class_exists('DOMDocument') &&
+                    if (class_exists('DOMDocument', false) &&
                        method_exists('DOMDocument', 'loadHTML') &&
                        !extension_loaded('domxml')
                    ) {
@ -169,21 +169,24 @@ class HTMLPurifier_Lexer
            '&#x27;' => "'"
        );
    public function parseText($string, $config) {
        return $this->parseData($string, false, $config);
    }
    public function parseAttr($string, $config) {
        return $this->parseData($string, true, $config);
    }
    /**
     * Parses special entities into the proper characters.
     *
     * This string will translate escaped versions of the special characters
     * into the correct ones.
     *
     * @warning
     * You should be able to treat the output of this function as
     * completely parsed, but that's only because all other entities should
     * have been handled previously in substituteNonSpecialEntities()
     *
     * @param string $string String character data to be parsed.
     * @return string Parsed character data.
     */
-    public function parseData($string)
+    public function parseData($string, $is_attr, $config)
    {
        // following functions require at least one character
        if ($string === '') {
@ -209,7 +212,15 @@ class HTMLPurifier_Lexer
        }
        // hmm... now we have some uncommon entities. Use the callback.
-        $string = $this->_entity_parser->substituteSpecialEntities($string);
+        if ($config->get('Core.LegacyEntityDecoder')) {
            $string = $this->_entity_parser->substituteSpecialEntities($string);
        } else {
            if ($is_attr) {
                $string = $this->_entity_parser->substituteAttrEntities($string);
            } else {
                $string = $this->_entity_parser->substituteTextEntities($string);
            }
        }
        return $string;
    }
@ -323,7 +334,9 @@ class HTMLPurifier_Lexer
        }
        // expand entities that aren't the big five
-        $html = $this->_entity_parser->substituteNonSpecialEntities($html);
+        if ($config->get('Core.LegacyEntityDecoder')) {
            $html = $this->_entity_parser->substituteNonSpecialEntities($html);
        }
        // clean into wellformed UTF-8 string for an SGML context: this has
        // to be done after entity expansion because the entities sometimes
@ -335,6 +348,13 @@ class HTMLPurifier_Lexer
            $html = preg_replace('#<\?.+?\?>#s', '', $html);
        }
        $hidden_elements = $config->get('Core.HiddenElements');
        if ($config->get('Core.AggressivelyRemoveScript') &&
            !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents')
            || empty($hidden_elements["script"]))) {
            $html = preg_replace('#<script[^>]*>.*?</script>#i', '', $html);
        }
        return $html;
    }
@ -345,12 +365,17 @@ class HTMLPurifier_Lexer
    public function extractBody($html)
    {
        $matches = array();
-        $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches);
+        $result = preg_match('|(.*?)<body[^>]*>(.*)</body>|is', $html, $matches);
        if ($result) {
-            return $matches[1];
+            // Make sure it's not in a comment
-        } else {
+            $comment_start = strrpos($matches[1], '<!--');
-            return $html;
+            $comment_end   = strrpos($matches[1], '-->');
            if ($comment_start === false ||
                ($comment_end !== false && $comment_end > $comment_start)) {
                return $matches[2];
            }
        }
        return $html;
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/Lexer/DOMLex.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer/DOMLex.php
@ -72,12 +72,20 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        $doc->loadHTML($html);
        restore_error_handler();
        $body = $doc->getElementsByTagName('html')->item(0)-> // <html>
                      getElementsByTagName('body')->item(0);  // <body>
        $div = $body->getElementsByTagName('div')->item(0); // <div>
        $tokens = array();
-        $this->tokenizeDOM(
+        $this->tokenizeDOM($div, $tokens, $config);
-            $doc->getElementsByTagName('html')->item(0)-> // <html>
+        // If the div has a sibling, that means we tripped across
-            getElementsByTagName('body')->item(0), //   <body>
+        // a premature </div> tag.  So remove the div we parsed,
-            $tokens
+        // and then tokenize the rest of body.  We can't tokenize
-        );
+        // the sibling directly as we'll lose the tags in that case.
        if ($div->nextSibling) {
            $body->removeChild($div);
            $this->tokenizeDOM($body, $tokens, $config);
        }
        return $tokens;
    }
@ -88,7 +96,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
     * @param HTMLPurifier_Token[] $tokens   Array-list of already tokenized tokens.
     * @return HTMLPurifier_Token of node appended to previously passed tokens.
     */
-    protected function tokenizeDOM($node, &$tokens)
+    protected function tokenizeDOM($node, &$tokens, $config)
    {
        $level = 0;
        $nodes = array($level => new HTMLPurifier_Queue(array($node)));
@ -97,7 +105,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
            while (!$nodes[$level]->isEmpty()) {
                $node = $nodes[$level]->shift(); // FIFO
                $collect = $level > 0 ? true : false;
-                $needEndingTag = $this->createStartNode($node, $tokens, $collect);
+                $needEndingTag = $this->createStartNode($node, $tokens, $collect, $config);
                if ($needEndingTag) {
                    $closingNodes[$level][] = $node;
                }
@ -127,7 +135,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
     * @return bool if the token needs an endtoken
     * @todo data and tagName properties don't seem to exist in DOMNode?
     */
-    protected function createStartNode($node, &$tokens, $collect)
+    protected function createStartNode($node, &$tokens, $collect, $config)
    {
        // intercept non element nodes. WE MUST catch all of them,
        // but we're not getting the character reference nodes because
@ -151,7 +159,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
                    }
                }
            }
-            $tokens[] = $this->factory->createText($this->parseData($data));
+            $tokens[] = $this->factory->createText($this->parseText($data, $config));
            return false;
        } elseif ($node->nodeType === XML_COMMENT_NODE) {
            // this is code is only invoked for comments in script/style in versions
@ -252,7 +260,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
     * @param HTMLPurifier_Context $context
     * @return string
     */
-    protected function wrapHTML($html, $config, $context)
+    protected function wrapHTML($html, $config, $context, $use_div = true)
    {
        $def = $config->getDefinition('HTML');
        $ret = '';
@ -271,7 +279,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        $ret .= '<html><head>';
        $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
        // No protection if $html contains a stray </div>!
-        $ret .= '</head><body>' . $html . '</body></html>';
+        $ret .= '</head><body>';
        if ($use_div) $ret .= '<div>';
        $ret .= $html;
        if ($use_div) $ret .= '</div>';
        $ret .= '</body></html>';
        return $ret;
    }
 }
--- a/extlib/HTMLPurifier/HTMLPurifier/Lexer/DirectLex.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer/DirectLex.php
@ -129,12 +129,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                // We are not inside tag and there still is another tag to parse
                $token = new
                HTMLPurifier_Token_Text(
-                    $this->parseData(
+                    $this->parseText(
                        substr(
                            $html,
                            $cursor,
                            $position_next_lt - $cursor
-                        )
+                        ), $config
                    )
                );
                if ($maintain_line_numbers) {
@ -154,11 +154,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                // Create Text of rest of string
                $token = new
                HTMLPurifier_Token_Text(
-                    $this->parseData(
+                    $this->parseText(
                        substr(
                            $html,
                            $cursor
-                        )
+                        ), $config
                    )
                );
                if ($maintain_line_numbers) {
@ -324,8 +324,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                $token = new
                HTMLPurifier_Token_Text(
                    '<' .
-                    $this->parseData(
+                    $this->parseText(
-                        substr($html, $cursor)
+                        substr($html, $cursor), $config
                    )
                );
                if ($maintain_line_numbers) {
@ -429,7 +429,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
            if ($value === false) {
                $value = '';
            }
-            return array($key => $this->parseData($value));
+            return array($key => $this->parseAttr($value, $config));
        }
        // setup loop environment
@ -518,7 +518,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                if ($value === false) {
                    $value = '';
                }
-                $array[$key] = $this->parseData($value);
+                $array[$key] = $this->parseAttr($value, $config);
                $cursor++;
            } else {
                // boolattr
--- a/extlib/HTMLPurifier/HTMLPurifier/Lexer/PH5P.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer/PH5P.php
@ -21,7 +21,7 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
    public function tokenizeHTML($html, $config, $context)
    {
        $new_html = $this->normalize($html, $config, $context);
-        $new_html = $this->wrapHTML($new_html, $config, $context);
+        $new_html = $this->wrapHTML($new_html, $config, $context, false /* no div */);
        try {
            $parser = new HTML5($new_html);
            $doc = $parser->save();
@ -34,9 +34,9 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
        $tokens = array();
        $this->tokenizeDOM(
            $doc->getElementsByTagName('html')->item(0)-> // <html>
-                getElementsByTagName('body')->item(0) //   <body>
+                  getElementsByTagName('body')->item(0) //   <body>
            ,
-            $tokens
+            $tokens, $config
        );
        return $tokens;
    }
@ -1515,6 +1515,7 @@ class HTML5
                // Consume the maximum number of characters possible, with the
                // consumed characters case-sensitively matching one of the
                // identifiers in the first column of the entities table.
                $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
                $len = strlen($e_name);
@ -1547,7 +1548,7 @@ class HTML5
        // Return a character token for the character corresponding to the
        // entity name (as given by the second column of the entities table).
-        return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
+        return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
    }
    private function emitToken($token)
--- a/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.php
@ -327,6 +327,10 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer
                case HTMLPurifier_VarParser::HASH:
                    $nvalue = '';
                    foreach ($value as $i => $v) {
                        if (is_array($v)) {
                            // HACK
                            $v = implode(";", $v);
                        }
                        $nvalue .= "$i:$v" . PHP_EOL;
                    }
                    $value = $nvalue;
--- a/extlib/HTMLPurifier/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy/MakeWellFormed.php
@ -165,7 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                        if (empty($zipper->front)) break;
                        $token = $zipper->prev($token);
                        // indicate that other injectors should not process this token,
-                        // but we need to reprocess it
+                        // but we need to reprocess it.  See Note [Injector skips]
                        unset($token->skip[$i]);
                        $token->rewind = $i;
                        if ($token instanceof HTMLPurifier_Token_Start) {
@ -210,6 +210,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                if ($token instanceof HTMLPurifier_Token_Text) {
                    foreach ($this->injectors as $i => $injector) {
                        if (isset($token->skip[$i])) {
                            // See Note [Injector skips]
                            continue;
                        }
                        if ($token->rewind !== null && $token->rewind !== $i) {
@ -367,6 +368,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
            if ($ok) {
                foreach ($this->injectors as $i => $injector) {
                    if (isset($token->skip[$i])) {
                        // See Note [Injector skips]
                        continue;
                    }
                    if ($token->rewind !== null && $token->rewind !== $i) {
@ -422,6 +424,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                $token->start = $current_parent;
                foreach ($this->injectors as $i => $injector) {
                    if (isset($token->skip[$i])) {
                        // See Note [Injector skips]
                        continue;
                    }
                    if ($token->rewind !== null && $token->rewind !== $i) {
@ -534,12 +537,17 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
     */
    protected function processToken($token, $injector = -1)
    {
        // Zend OpCache miscompiles $token = array($token), so
        // avoid this pattern.  See: https://github.com/ezyang/htmlpurifier/issues/108
        // normalize forms of token
        if (is_object($token)) {
-            $token = array(1, $token);
+            $tmp = $token;
            $token = array(1, $tmp);
        }
        if (is_int($token)) {
-            $token = array($token);
+            $tmp = $token;
            $token = array($tmp);
        }
        if ($token === false) {
            $token = array(1);
@ -561,7 +569,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
        list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
        if ($injector > -1) {
-            // determine appropriate skips
+            // See Note [Injector skips]
            // Determine appropriate skips.  Here's what the code does:
            //  *If* we deleted one or more tokens, copy the skips
            //  of those tokens into the skips of the new tokens (in $token).
            //  Also, mark the newly inserted tokens as having come from
            //  $injector.
            $oldskip = isset($old[0]) ? $old[0]->skip : array();
            foreach ($token as $object) {
                $object->skip = $oldskip;
@ -597,4 +610,50 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
    }
 }
 // Note [Injector skips]
 // ~~~~~~~~~~~~~~~~~~~~~
 // When I originally designed this class, the idea behind the 'skip'
 // property of HTMLPurifier_Token was to help avoid infinite loops
 // in injector processing.  For example, suppose you wrote an injector
 // that bolded swear words.  Naively, you might write it so that
 // whenever you saw ****, you replaced it with <strong>****</strong>.
 //
 // When this happens, we will reprocess all of the tokens with the
 // other injectors.  Now there is an opportunity for infinite loop:
 // if we rerun the swear-word injector on these tokens, we might
 // see **** and then reprocess again to get
 // <strong><strong>****</strong></strong> ad infinitum.
 //
 // Thus, the idea of a skip is that once we process a token with
 // an injector, we mark all of those tokens as having "come from"
 // the injector, and we never run the injector again on these
 // tokens.
 //
 // There were two more complications, however:
 //
 //  - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
 //    you had <b><i></i></b>, after you removed the <i></i>, you
 //    really would like this injector to go back and reprocess
 //    the <b> tag, discovering that it is now empty and can be
 //    removed.  So we reintroduced the possibility of infinite looping
 //    by adding a "rewind" function, which let you go back to an
 //    earlier point in the token stream and reprocess it with injectors.
 //    Needless to say, we need to UN-skip the token so it gets
 //    reprocessed.
 //
 //  - Suppose that you successfuly process a token, replace it with
 //    one with your skip mark, but now another injector wants to
 //    process the skipped token with another token.  Should you continue
 //    to skip that new token, or reprocess it?  If you reprocess,
 //    you can end up with an infinite loop where one injector converts
 //    <a> to <b>, and then another injector converts it back.  So
 //    we inherit the skips, but for some reason, I thought that we
 //    should inherit the skip from the first token of the token
 //    that we deleted.  Why?  Well, it seems to work OK.
 //
 // If I were to redesign this functionality, I would absolutely not
 // go about doing it this way: the semantics are just not very well
 // defined, and in any case you probably wanted to operate on trees,
 // not token streams.
 // vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/HTMLPurifier/Token.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Token.php
@ -26,7 +26,7 @@ abstract class HTMLPurifier_Token
    public $armor = array();
    /**
-     * Used during MakeWellFormed.
+     * Used during MakeWellFormed.  See Note [Injector skips]
     * @type
     */
    public $skip;
--- a/extlib/HTMLPurifier/HTMLPurifier/URI.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/URI.php
@ -85,11 +85,13 @@ class HTMLPurifier_URI
            $def = $config->getDefinition('URI');
            $scheme_obj = $def->getDefaultScheme($config, $context);
            if (!$scheme_obj) {
-                // something funky happened to the default scheme object
+                if ($def->defaultScheme !== null) {
-                trigger_error(
+                    // something funky happened to the default scheme object
-                    'Default scheme object "' . $def->defaultScheme . '" was not readable',
+                    trigger_error(
-                    E_USER_WARNING
+                        'Default scheme object "' . $def->defaultScheme . '" was not readable',
-                );
+                        E_USER_WARNING
                    );
                } // suppress error if it's null
                return false;
            }
        }
--- a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/data.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/data.php
@ -79,9 +79,18 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
        } else {
            $raw_data = $data;
        }
        if ( strlen($raw_data) < 12 ) {
            // error; exif_imagetype throws exception with small files,
            // and this likely indicates a corrupt URI/failed parse anyway
            return false;
        }
        // XXX probably want to refactor this into a general mechanism
        // for filtering arbitrary content types
-        $file = tempnam("/tmp", "");
+        if (function_exists('sys_get_temp_dir')) {
            $file = tempnam(sys_get_temp_dir(), "");
        } else {
            $file = tempnam("/tmp", "");
        }
        file_put_contents($file, $raw_data);
        if (function_exists('exif_imagetype')) {
            $image_code = exif_imagetype($file);
--- a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/tel.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/tel.php
@ -0,0 +1,46 @@
 <?php
 /**
 * Validates tel (for phone numbers).
 *
 * The relevant specifications for this protocol are RFC 3966 and RFC 5341,
 * but this class takes a much simpler approach: we normalize phone
 * numbers so that they only include (possibly) a leading plus,
 * and then any number of digits and x'es.
 */
 class HTMLPurifier_URIScheme_tel extends HTMLPurifier_URIScheme
 {
    /**
     * @type bool
     */
    public $browsable = false;
    /**
     * @type bool
     */
    public $may_omit_host = true;
    /**
     * @param HTMLPurifier_URI $uri
     * @param HTMLPurifier_Config $config
     * @param HTMLPurifier_Context $context
     * @return bool
     */
    public function doValidate(&$uri, $config, $context)
    {
        $uri->userinfo = null;
        $uri->host     = null;
        $uri->port     = null;
        // Delete all non-numeric characters, non-x characters
        // from phone number, EXCEPT for a leading plus sign.
        $uri->path = preg_replace('/(?!^\+)[^\dx]/', '',
                     // Normalize e(x)tension to lower-case
                     str_replace('X', 'x', $uri->path));
        return true;
    }
 }
 // vim: et sw=4 sts=4
--- a/extlib/HTMLPurifier/VERSION
+++ b/extlib/HTMLPurifier/VERSION
@ -1 +1 @@
-4.7.0
+4.9.3