Update HTMLPurifier to current stable version 4.5.0
This commit is contained in:
@@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
|
||||
*/
|
||||
protected $strategies = array();
|
||||
|
||||
abstract public function __construct();
|
||||
|
||||
public function execute($tokens, $config, $context) {
|
||||
foreach ($this->strategies as $strategy) {
|
||||
$tokens = $strategy->execute($tokens, $config, $context);
|
||||
|
||||
@@ -26,6 +26,22 @@
|
||||
* translated into text depends on the child definitions.
|
||||
*
|
||||
* @todo Enable nodes to be bubbled out of the structure.
|
||||
*
|
||||
* @warning This algorithm (though it may be hard to see) proceeds from
|
||||
* a top-down fashion. Thus, parents are processed before
|
||||
* children. This is easy to implement and has a nice effiency
|
||||
* benefit, in that if a node is removed, we never waste any
|
||||
* time processing it, but it also means that if a child
|
||||
* changes in a non-encapsulated way (e.g. it is removed), we
|
||||
* need to go back and reprocess the parent to see if those
|
||||
* changes resulted in problems for the parent. See
|
||||
* [BACKTRACK] for an example of this. In the current
|
||||
* implementation, this backtracking can only be triggered when
|
||||
* a node is removed and if that node was the sole node, the
|
||||
* parent would need to be removed. As such, it is easy to see
|
||||
* that backtracking only incurs constant overhead. If more
|
||||
* sophisticated backtracking is implemented, care must be
|
||||
* taken to avoid nontermination or exponential blowup.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
@@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
// get a copy of the HTML definition
|
||||
$definition = $config->getHTMLDefinition();
|
||||
|
||||
$excludes_enabled = !$config->get('Core.DisableExcludes');
|
||||
|
||||
// insert implicit "parent" node, will be removed at end.
|
||||
// DEFINITION CALL
|
||||
$parent_name = $definition->info_parent;
|
||||
@@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
// parent exclusions. The array should not be very large, two
|
||||
// elements at most.
|
||||
$excluded = false;
|
||||
if (!empty($exclude_stack)) {
|
||||
if (!empty($exclude_stack) && $excludes_enabled) {
|
||||
foreach ($exclude_stack as $lookup) {
|
||||
if (isset($lookup[$tokens[$i]->name])) {
|
||||
$excluded = true;
|
||||
@@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
// our current implementation claims that that case would
|
||||
// not allow empty, even if it did
|
||||
if (!$parent_def->child->allow_empty) {
|
||||
// we need to do a double-check
|
||||
// we need to do a double-check [BACKTRACK]
|
||||
$i = $parent_index;
|
||||
array_pop($stack);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,14 @@
|
||||
|
||||
/**
|
||||
* Takes tokens makes them well-formed (balance end tags, etc.)
|
||||
*
|
||||
* Specification of the armor attributes this strategy uses:
|
||||
*
|
||||
* - MakeWellFormed_TagClosedError: This armor field is used to
|
||||
* suppress tag closed errors for certain tokens [TagClosedSuppress],
|
||||
* in particular, if a tag was generated automatically by HTML
|
||||
* Purifier, we may rely on our infrastructure to close it for us
|
||||
* and shouldn't report an error to the user [TagClosedAuto].
|
||||
*/
|
||||
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
{
|
||||
@@ -43,6 +51,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// local variables
|
||||
$generator = new HTMLPurifier_Generator($config, $context);
|
||||
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
|
||||
// used for autoclose early abortion
|
||||
$global_parent_allowed_elements = array();
|
||||
if (isset($definition->info[$definition->info_parent])) {
|
||||
// may be unset under testing circumstances
|
||||
$global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config);
|
||||
}
|
||||
$e = $context->get('ErrorCollector', true);
|
||||
$t = false; // token index
|
||||
$i = false; // injector index
|
||||
@@ -102,7 +116,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
|
||||
// -- end INJECTOR --
|
||||
|
||||
// a note on punting:
|
||||
// a note on reprocessing:
|
||||
// In order to reduce code duplication, whenever some code needs
|
||||
// to make HTML changes in order to make things "correct", the
|
||||
// new HTML gets sent through the purifier, regardless of its
|
||||
@@ -149,7 +163,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$top_nesting = array_pop($this->stack);
|
||||
$this->stack[] = $top_nesting;
|
||||
|
||||
// send error
|
||||
// send error [TagClosedSuppress]
|
||||
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
|
||||
}
|
||||
@@ -193,12 +207,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$ok = false;
|
||||
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
||||
// claims to be a start tag but is empty
|
||||
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
|
||||
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
|
||||
$ok = true;
|
||||
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
||||
// claims to be empty but really is a start tag
|
||||
$this->swap(new HTMLPurifier_Token_End($token->name));
|
||||
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
|
||||
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor));
|
||||
// punt (since we had to modify the input stream in a non-trivial way)
|
||||
$reprocess = true;
|
||||
continue;
|
||||
@@ -211,6 +225,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// ...unless they also have to close their parent
|
||||
if (!empty($this->stack)) {
|
||||
|
||||
// Performance note: you might think that it's rather
|
||||
// inefficient, recalculating the autoclose information
|
||||
// for every tag that a token closes (since when we
|
||||
// do an autoclose, we push a new token into the
|
||||
// stream and then /process/ that, before
|
||||
// re-processing this token.) But this is
|
||||
// necessary, because an injector can make an
|
||||
// arbitrary transformations to the autoclosing
|
||||
// tokens we introduce, so things may have changed
|
||||
// in the meantime. Also, doing the inefficient thing is
|
||||
// "easy" to reason about (for certain perverse definitions
|
||||
// of "easy")
|
||||
|
||||
$parent = array_pop($this->stack);
|
||||
$this->stack[] = $parent;
|
||||
|
||||
@@ -243,24 +270,51 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
if ($autoclose) {
|
||||
// errors need to be updated
|
||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||
$new_token->start = $parent;
|
||||
if ($carryover) {
|
||||
$element = clone $parent;
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
$element->carryover = true;
|
||||
$this->processToken(array($new_token, $token, $element));
|
||||
} else {
|
||||
$this->insertBefore($new_token);
|
||||
}
|
||||
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
||||
if (!$carryover) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
} else {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
||||
// check if this autoclose is doomed to fail
|
||||
// (this rechecks $parent, which his harmless)
|
||||
$autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
|
||||
if (!$autoclose_ok) {
|
||||
foreach ($this->stack as $ancestor) {
|
||||
$elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
|
||||
if (isset($elements[$token->name])) {
|
||||
$autoclose_ok = true;
|
||||
break;
|
||||
}
|
||||
if ($definition->info[$token->name]->wrap) {
|
||||
$wrapname = $definition->info[$token->name]->wrap;
|
||||
$wrapdef = $definition->info[$wrapname];
|
||||
$wrap_elements = $wrapdef->child->getAllowedElements($config);
|
||||
if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
|
||||
$autoclose_ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($autoclose_ok) {
|
||||
// errors need to be updated
|
||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||
$new_token->start = $parent;
|
||||
if ($carryover) {
|
||||
$element = clone $parent;
|
||||
// [TagClosedAuto]
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
$element->carryover = true;
|
||||
$this->processToken(array($new_token, $token, $element));
|
||||
} else {
|
||||
$this->insertBefore($new_token);
|
||||
}
|
||||
// [TagClosedSuppress]
|
||||
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
||||
if (!$carryover) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
} else {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$this->remove();
|
||||
}
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
@@ -366,7 +420,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
if ($e) {
|
||||
for ($j = $c - 1; $j > 0; $j--) {
|
||||
// notice we exclude $j == 0, i.e. the current ending tag, from
|
||||
// the errors...
|
||||
// the errors... [TagClosedSuppress]
|
||||
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
|
||||
}
|
||||
@@ -381,6 +435,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$new_token->start = $skipped_tags[$j];
|
||||
array_unshift($replace, $new_token);
|
||||
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
|
||||
// [TagClosedAuto]
|
||||
$element = clone $skipped_tags[$j];
|
||||
$element->carryover = true;
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
@@ -449,7 +504,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a token before the current token. Cursor now points to this token
|
||||
* Inserts a token before the current token. Cursor now points to
|
||||
* this token. You must reprocess after this.
|
||||
*/
|
||||
private function insertBefore($token) {
|
||||
array_splice($this->tokens, $this->t, 0, array($token));
|
||||
@@ -457,14 +513,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
|
||||
/**
|
||||
* Removes current token. Cursor now points to new token occupying previously
|
||||
* occupied space.
|
||||
* occupied space. You must reprocess after this.
|
||||
*/
|
||||
private function remove() {
|
||||
array_splice($this->tokens, $this->t, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap current token with new token. Cursor points to new token (no change).
|
||||
* Swap current token with new token. Cursor points to new token (no
|
||||
* change). You must reprocess after this.
|
||||
*/
|
||||
private function swap($token) {
|
||||
$this->tokens[$this->t] = $token;
|
||||
|
||||
@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
|
||||
// currently only used to determine if comments should be kept
|
||||
$trusted = $config->get('HTML.Trusted');
|
||||
$comment_lookup = $config->get('HTML.AllowedComments');
|
||||
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
||||
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
||||
|
||||
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
||||
$hidden_elements = $config->get('Core.HiddenElements');
|
||||
@@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if ($textify_comments !== false) {
|
||||
$data = $token->data;
|
||||
$token = new HTMLPurifier_Token_Text($data);
|
||||
} elseif ($trusted) {
|
||||
// keep, but perform comment cleaning
|
||||
} elseif ($trusted || $check_comments) {
|
||||
// always cleanup comments
|
||||
$trailing_hyphen = false;
|
||||
if ($e) {
|
||||
// perform check whether or not there's a trailing hyphen
|
||||
if (substr($token->data, -1) == '-') {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
$trailing_hyphen = true;
|
||||
}
|
||||
}
|
||||
$token->data = rtrim($token->data, '-');
|
||||
$found_double_hyphen = false;
|
||||
while (strpos($token->data, '--') !== false) {
|
||||
if ($e && !$found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
$found_double_hyphen = true; // prevent double-erroring
|
||||
$found_double_hyphen = true;
|
||||
$token->data = str_replace('--', '-', $token->data);
|
||||
}
|
||||
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
|
||||
// OK good
|
||||
if ($e) {
|
||||
if ($trailing_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
}
|
||||
if ($found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($e) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// strip comments
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
|
||||
Reference in New Issue
Block a user