Update HTMLPurifier to current stable version 4.5.0

This commit is contained in:
Michael Kaufmann (d00p)
2013-09-21 12:18:55 +02:00
parent fc8bd2b7af
commit 2dfdd6a5f9
87 changed files with 2057 additions and 342 deletions

View File

@@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
*/
protected $strategies = array();
abstract public function __construct();
public function execute($tokens, $config, $context) {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context);

View File

@@ -26,6 +26,22 @@
* translated into text depends on the child definitions.
*
* @todo Enable nodes to be bubbled out of the structure.
*
* @warning This algorithm (though it may be hard to see) proceeds from
* a top-down fashion. Thus, parents are processed before
* children. This is easy to implement and has a nice effiency
* benefit, in that if a node is removed, we never waste any
* time processing it, but it also means that if a child
* changes in a non-encapsulated way (e.g. it is removed), we
* need to go back and reprocess the parent to see if those
* changes resulted in problems for the parent. See
* [BACKTRACK] for an example of this. In the current
* implementation, this backtracking can only be triggered when
* a node is removed and if that node was the sole node, the
* parent would need to be removed. As such, it is easy to see
* that backtracking only incurs constant overhead. If more
* sophisticated backtracking is implemented, care must be
* taken to avoid nontermination or exponential blowup.
*/
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
@@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
$excludes_enabled = !$config->get('Core.DisableExcludes');
// insert implicit "parent" node, will be removed at end.
// DEFINITION CALL
$parent_name = $definition->info_parent;
@@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// parent exclusions. The array should not be very large, two
// elements at most.
$excluded = false;
if (!empty($exclude_stack)) {
if (!empty($exclude_stack) && $excludes_enabled) {
foreach ($exclude_stack as $lookup) {
if (isset($lookup[$tokens[$i]->name])) {
$excluded = true;
@@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// our current implementation claims that that case would
// not allow empty, even if it did
if (!$parent_def->child->allow_empty) {
// we need to do a double-check
// we need to do a double-check [BACKTRACK]
$i = $parent_index;
array_pop($stack);
}

View File

@@ -2,6 +2,14 @@
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*
* Specification of the armor attributes this strategy uses:
*
* - MakeWellFormed_TagClosedError: This armor field is used to
* suppress tag closed errors for certain tokens [TagClosedSuppress],
* in particular, if a tag was generated automatically by HTML
* Purifier, we may rely on our infrastructure to close it for us
* and shouldn't report an error to the user [TagClosedAuto].
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
@@ -43,6 +51,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// local variables
$generator = new HTMLPurifier_Generator($config, $context);
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
// used for autoclose early abortion
$global_parent_allowed_elements = array();
if (isset($definition->info[$definition->info_parent])) {
// may be unset under testing circumstances
$global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config);
}
$e = $context->get('ErrorCollector', true);
$t = false; // token index
$i = false; // injector index
@@ -102,7 +116,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// -- end INJECTOR --
// a note on punting:
// a note on reprocessing:
// In order to reduce code duplication, whenever some code needs
// to make HTML changes in order to make things "correct", the
// new HTML gets sent through the purifier, regardless of its
@@ -149,7 +163,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$top_nesting = array_pop($this->stack);
$this->stack[] = $top_nesting;
// send error
// send error [TagClosedSuppress]
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
}
@@ -193,12 +207,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$ok = false;
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// claims to be a start tag but is empty
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
$ok = true;
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
$this->swap(new HTMLPurifier_Token_End($token->name));
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor));
// punt (since we had to modify the input stream in a non-trivial way)
$reprocess = true;
continue;
@@ -211,6 +225,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// ...unless they also have to close their parent
if (!empty($this->stack)) {
// Performance note: you might think that it's rather
// inefficient, recalculating the autoclose information
// for every tag that a token closes (since when we
// do an autoclose, we push a new token into the
// stream and then /process/ that, before
// re-processing this token.) But this is
// necessary, because an injector can make an
// arbitrary transformations to the autoclosing
// tokens we introduce, so things may have changed
// in the meantime. Also, doing the inefficient thing is
// "easy" to reason about (for certain perverse definitions
// of "easy")
$parent = array_pop($this->stack);
$this->stack[] = $parent;
@@ -243,24 +270,51 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
if ($autoclose) {
// errors need to be updated
$new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent;
if ($carryover) {
$element = clone $parent;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$this->processToken(array($new_token, $token, $element));
} else {
$this->insertBefore($new_token);
}
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
// check if this autoclose is doomed to fail
// (this rechecks $parent, which his harmless)
$autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
if (!$autoclose_ok) {
foreach ($this->stack as $ancestor) {
$elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
if (isset($elements[$token->name])) {
$autoclose_ok = true;
break;
}
if ($definition->info[$token->name]->wrap) {
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$wrap_elements = $wrapdef->child->getAllowedElements($config);
if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
$autoclose_ok = true;
break;
}
}
}
}
if ($autoclose_ok) {
// errors need to be updated
$new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent;
if ($carryover) {
$element = clone $parent;
// [TagClosedAuto]
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$this->processToken(array($new_token, $token, $element));
} else {
$this->insertBefore($new_token);
}
// [TagClosedSuppress]
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
}
}
} else {
$this->remove();
}
$reprocess = true;
continue;
}
@@ -366,7 +420,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($e) {
for ($j = $c - 1; $j > 0; $j--) {
// notice we exclude $j == 0, i.e. the current ending tag, from
// the errors...
// the errors... [TagClosedSuppress]
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
}
@@ -381,6 +435,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$new_token->start = $skipped_tags[$j];
array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
// [TagClosedAuto]
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
@@ -449,7 +504,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
/**
* Inserts a token before the current token. Cursor now points to this token
* Inserts a token before the current token. Cursor now points to
* this token. You must reprocess after this.
*/
private function insertBefore($token) {
array_splice($this->tokens, $this->t, 0, array($token));
@@ -457,14 +513,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
/**
* Removes current token. Cursor now points to new token occupying previously
* occupied space.
* occupied space. You must reprocess after this.
*/
private function remove() {
array_splice($this->tokens, $this->t, 1);
}
/**
* Swap current token with new token. Cursor points to new token (no change).
* Swap current token with new token. Cursor points to new token (no
* change). You must reprocess after this.
*/
private function swap($token) {
$this->tokens[$this->t] = $token;

View File

@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted');
$comment_lookup = $config->get('HTML.AllowedComments');
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents');
$hidden_elements = $config->get('Core.HiddenElements');
@@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
} elseif ($trusted) {
// keep, but perform comment cleaning
} elseif ($trusted || $check_comments) {
// always cleanup comments
$trailing_hyphen = false;
if ($e) {
// perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
$trailing_hyphen = true;
}
}
$token->data = rtrim($token->data, '-');
$found_double_hyphen = false;
while (strpos($token->data, '--') !== false) {
if ($e && !$found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
$found_double_hyphen = true; // prevent double-erroring
$found_double_hyphen = true;
$token->data = str_replace('--', '-', $token->data);
}
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
// OK good
if ($e) {
if ($trailing_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
}
if ($found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
}
} else {
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} else {
// strip comments
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');