[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3master/var/www/se3/includes/library/HTMLPurifier/Strategy/ -> MakeWellFormed.php (source)

   1  <?php
   2  
   3  /**
   4   * Takes tokens makes them well-formed (balance end tags, etc.)
   5   *
   6   * Specification of the armor attributes this strategy uses:
   7   *
   8   *      - MakeWellFormed_TagClosedError: This armor field is used to
   9   *        suppress tag closed errors for certain tokens [TagClosedSuppress],
  10   *        in particular, if a tag was generated automatically by HTML
  11   *        Purifier, we may rely on our infrastructure to close it for us
  12   *        and shouldn't report an error to the user [TagClosedAuto].
  13   */
  14  class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
  15  {
  16  
  17      /**
  18       * Array stream of tokens being processed.
  19       * @type HTMLPurifier_Token[]
  20       */
  21      protected $tokens;
  22  
  23      /**
  24       * Current token.
  25       * @type HTMLPurifier_Token
  26       */
  27      protected $token;
  28  
  29      /**
  30       * Zipper managing the true state.
  31       * @type HTMLPurifier_Zipper
  32       */
  33      protected $zipper;
  34  
  35      /**
  36       * Current nesting of elements.
  37       * @type array
  38       */
  39      protected $stack;
  40  
  41      /**
  42       * Injectors active in this stream processing.
  43       * @type HTMLPurifier_Injector[]
  44       */
  45      protected $injectors;
  46  
  47      /**
  48       * Current instance of HTMLPurifier_Config.
  49       * @type HTMLPurifier_Config
  50       */
  51      protected $config;
  52  
  53      /**
  54       * Current instance of HTMLPurifier_Context.
  55       * @type HTMLPurifier_Context
  56       */
  57      protected $context;
  58  
  59      /**
  60       * @param HTMLPurifier_Token[] $tokens
  61       * @param HTMLPurifier_Config $config
  62       * @param HTMLPurifier_Context $context
  63       * @return HTMLPurifier_Token[]
  64       * @throws HTMLPurifier_Exception
  65       */
  66      public function execute($tokens, $config, $context)
  67      {
  68          $definition = $config->getHTMLDefinition();
  69  
  70          // local variables
  71          $generator = new HTMLPurifier_Generator($config, $context);
  72          $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
  73          // used for autoclose early abortion
  74          $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
  75          $e = $context->get('ErrorCollector', true);
  76          $i = false; // injector index
  77          list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
  78          if ($token === NULL) {
  79              return array();
  80          }
  81          $reprocess = false; // whether or not to reprocess the same token
  82          $stack = array();
  83  
  84          // member variables
  85          $this->stack =& $stack;
  86          $this->tokens =& $tokens;
  87          $this->token =& $token;
  88          $this->zipper =& $zipper;
  89          $this->config = $config;
  90          $this->context = $context;
  91  
  92          // context variables
  93          $context->register('CurrentNesting', $stack);
  94          $context->register('InputZipper', $zipper);
  95          $context->register('CurrentToken', $token);
  96  
  97          // -- begin INJECTOR --
  98  
  99          $this->injectors = array();
 100  
 101          $injectors = $config->getBatch('AutoFormat');
 102          $def_injectors = $definition->info_injector;
 103          $custom_injectors = $injectors['Custom'];
 104          unset($injectors['Custom']); // special case
 105          foreach ($injectors as $injector => $b) {
 106              // XXX: Fix with a legitimate lookup table of enabled filters
 107              if (strpos($injector, '.') !== false) {
 108                  continue;
 109              }
 110              $injector = "HTMLPurifier_Injector_$injector";
 111              if (!$b) {
 112                  continue;
 113              }
 114              $this->injectors[] = new $injector;
 115          }
 116          foreach ($def_injectors as $injector) {
 117              // assumed to be objects
 118              $this->injectors[] = $injector;
 119          }
 120          foreach ($custom_injectors as $injector) {
 121              if (!$injector) {
 122                  continue;
 123              }
 124              if (is_string($injector)) {
 125                  $injector = "HTMLPurifier_Injector_$injector";
 126                  $injector = new $injector;
 127              }
 128              $this->injectors[] = $injector;
 129          }
 130  
 131          // give the injectors references to the definition and context
 132          // variables for performance reasons
 133          foreach ($this->injectors as $ix => $injector) {
 134              $error = $injector->prepare($config, $context);
 135              if (!$error) {
 136                  continue;
 137              }
 138              array_splice($this->injectors, $ix, 1); // rm the injector
 139              trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
 140          }
 141  
 142          // -- end INJECTOR --
 143  
 144          // a note on reprocessing:
 145          //      In order to reduce code duplication, whenever some code needs
 146          //      to make HTML changes in order to make things "correct", the
 147          //      new HTML gets sent through the purifier, regardless of its
 148          //      status. This means that if we add a start token, because it
 149          //      was totally necessary, we don't have to update nesting; we just
 150          //      punt ($reprocess = true; continue;) and it does that for us.
 151  
 152          // isset is in loop because $tokens size changes during loop exec
 153          for (;;
 154               // only increment if we don't need to reprocess
 155               $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
 156  
 157              // check for a rewind
 158              if (is_int($i)) {
 159                  // possibility: disable rewinding if the current token has a
 160                  // rewind set on it already. This would offer protection from
 161                  // infinite loop, but might hinder some advanced rewinding.
 162                  $rewind_offset = $this->injectors[$i]->getRewindOffset();
 163                  if (is_int($rewind_offset)) {
 164                      for ($j = 0; $j < $rewind_offset; $j++) {
 165                          if (empty($zipper->front)) break;
 166                          $token = $zipper->prev($token);
 167                          // indicate that other injectors should not process this token,
 168                          // but we need to reprocess it
 169                          unset($token->skip[$i]);
 170                          $token->rewind = $i;
 171                          if ($token instanceof HTMLPurifier_Token_Start) {
 172                              array_pop($this->stack);
 173                          } elseif ($token instanceof HTMLPurifier_Token_End) {
 174                              $this->stack[] = $token->start;
 175                          }
 176                      }
 177                  }
 178                  $i = false;
 179              }
 180  
 181              // handle case of document end
 182              if ($token === NULL) {
 183                  // kill processing if stack is empty
 184                  if (empty($this->stack)) {
 185                      break;
 186                  }
 187  
 188                  // peek
 189                  $top_nesting = array_pop($this->stack);
 190                  $this->stack[] = $top_nesting;
 191  
 192                  // send error [TagClosedSuppress]
 193                  if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
 194                      $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
 195                  }
 196  
 197                  // append, don't splice, since this is the end
 198                  $token = new HTMLPurifier_Token_End($top_nesting->name);
 199  
 200                  // punt!
 201                  $reprocess = true;
 202                  continue;
 203              }
 204  
 205              //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
 206              //flush();
 207  
 208              // quick-check: if it's not a tag, no need to process
 209              if (empty($token->is_tag)) {
 210                  if ($token instanceof HTMLPurifier_Token_Text) {
 211                      foreach ($this->injectors as $i => $injector) {
 212                          if (isset($token->skip[$i])) {
 213                              continue;
 214                          }
 215                          if ($token->rewind !== null && $token->rewind !== $i) {
 216                              continue;
 217                          }
 218                          // XXX fuckup
 219                          $r = $token;
 220                          $injector->handleText($r);
 221                          $token = $this->processToken($r, $i);
 222                          $reprocess = true;
 223                          break;
 224                      }
 225                  }
 226                  // another possibility is a comment
 227                  continue;
 228              }
 229  
 230              if (isset($definition->info[$token->name])) {
 231                  $type = $definition->info[$token->name]->child->type;
 232              } else {
 233                  $type = false; // Type is unknown, treat accordingly
 234              }
 235  
 236              // quick tag checks: anything that's *not* an end tag
 237              $ok = false;
 238              if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
 239                  // claims to be a start tag but is empty
 240                  $token = new HTMLPurifier_Token_Empty(
 241                      $token->name,
 242                      $token->attr,
 243                      $token->line,
 244                      $token->col,
 245                      $token->armor
 246                  );
 247                  $ok = true;
 248              } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
 249                  // claims to be empty but really is a start tag
 250                  // NB: this assignment is required
 251                  $old_token = $token;
 252                  $token = new HTMLPurifier_Token_End($token->name);
 253                  $token = $this->insertBefore(
 254                      new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
 255                  );
 256                  // punt (since we had to modify the input stream in a non-trivial way)
 257                  $reprocess = true;
 258                  continue;
 259              } elseif ($token instanceof HTMLPurifier_Token_Empty) {
 260                  // real empty token
 261                  $ok = true;
 262              } elseif ($token instanceof HTMLPurifier_Token_Start) {
 263                  // start tag
 264  
 265                  // ...unless they also have to close their parent
 266                  if (!empty($this->stack)) {
 267  
 268                      // Performance note: you might think that it's rather
 269                      // inefficient, recalculating the autoclose information
 270                      // for every tag that a token closes (since when we
 271                      // do an autoclose, we push a new token into the
 272                      // stream and then /process/ that, before
 273                      // re-processing this token.)  But this is
 274                      // necessary, because an injector can make an
 275                      // arbitrary transformations to the autoclosing
 276                      // tokens we introduce, so things may have changed
 277                      // in the meantime.  Also, doing the inefficient thing is
 278                      // "easy" to reason about (for certain perverse definitions
 279                      // of "easy")
 280  
 281                      $parent = array_pop($this->stack);
 282                      $this->stack[] = $parent;
 283  
 284                      $parent_def = null;
 285                      $parent_elements = null;
 286                      $autoclose = false;
 287                      if (isset($definition->info[$parent->name])) {
 288                          $parent_def = $definition->info[$parent->name];
 289                          $parent_elements = $parent_def->child->getAllowedElements($config);
 290                          $autoclose = !isset($parent_elements[$token->name]);
 291                      }
 292  
 293                      if ($autoclose && $definition->info[$token->name]->wrap) {
 294                          // Check if an element can be wrapped by another
 295                          // element to make it valid in a context (for
 296                          // example, <ul><ul> needs a <li> in between)
 297                          $wrapname = $definition->info[$token->name]->wrap;
 298                          $wrapdef = $definition->info[$wrapname];
 299                          $elements = $wrapdef->child->getAllowedElements($config);
 300                          if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
 301                              $newtoken = new HTMLPurifier_Token_Start($wrapname);
 302                              $token = $this->insertBefore($newtoken);
 303                              $reprocess = true;
 304                              continue;
 305                          }
 306                      }
 307  
 308                      $carryover = false;
 309                      if ($autoclose && $parent_def->formatting) {
 310                          $carryover = true;
 311                      }
 312  
 313                      if ($autoclose) {
 314                          // check if this autoclose is doomed to fail
 315                          // (this rechecks $parent, which his harmless)
 316                          $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
 317                          if (!$autoclose_ok) {
 318                              foreach ($this->stack as $ancestor) {
 319                                  $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
 320                                  if (isset($elements[$token->name])) {
 321                                      $autoclose_ok = true;
 322                                      break;
 323                                  }
 324                                  if ($definition->info[$token->name]->wrap) {
 325                                      $wrapname = $definition->info[$token->name]->wrap;
 326                                      $wrapdef = $definition->info[$wrapname];
 327                                      $wrap_elements = $wrapdef->child->getAllowedElements($config);
 328                                      if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
 329                                          $autoclose_ok = true;
 330                                          break;
 331                                      }
 332                                  }
 333                              }
 334                          }
 335                          if ($autoclose_ok) {
 336                              // errors need to be updated
 337                              $new_token = new HTMLPurifier_Token_End($parent->name);
 338                              $new_token->start = $parent;
 339                              // [TagClosedSuppress]
 340                              if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
 341                                  if (!$carryover) {
 342                                      $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
 343                                  } else {
 344                                      $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
 345                                  }
 346                              }
 347                              if ($carryover) {
 348                                  $element = clone $parent;
 349                                  // [TagClosedAuto]
 350                                  $element->armor['MakeWellFormed_TagClosedError'] = true;
 351                                  $element->carryover = true;
 352                                  $token = $this->processToken(array($new_token, $token, $element));
 353                              } else {
 354                                  $token = $this->insertBefore($new_token);
 355                              }
 356                          } else {
 357                              $token = $this->remove();
 358                          }
 359                          $reprocess = true;
 360                          continue;
 361                      }
 362  
 363                  }
 364                  $ok = true;
 365              }
 366  
 367              if ($ok) {
 368                  foreach ($this->injectors as $i => $injector) {
 369                      if (isset($token->skip[$i])) {
 370                          continue;
 371                      }
 372                      if ($token->rewind !== null && $token->rewind !== $i) {
 373                          continue;
 374                      }
 375                      $r = $token;
 376                      $injector->handleElement($r);
 377                      $token = $this->processToken($r, $i);
 378                      $reprocess = true;
 379                      break;
 380                  }
 381                  if (!$reprocess) {
 382                      // ah, nothing interesting happened; do normal processing
 383                      if ($token instanceof HTMLPurifier_Token_Start) {
 384                          $this->stack[] = $token;
 385                      } elseif ($token instanceof HTMLPurifier_Token_End) {
 386                          throw new HTMLPurifier_Exception(
 387                              'Improper handling of end tag in start code; possible error in MakeWellFormed'
 388                          );
 389                      }
 390                  }
 391                  continue;
 392              }
 393  
 394              // sanity check: we should be dealing with a closing tag
 395              if (!$token instanceof HTMLPurifier_Token_End) {
 396                  throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
 397              }
 398  
 399              // make sure that we have something open
 400              if (empty($this->stack)) {
 401                  if ($escape_invalid_tags) {
 402                      if ($e) {
 403                          $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
 404                      }
 405                      $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
 406                  } else {
 407                      if ($e) {
 408                          $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
 409                      }
 410                      $token = $this->remove();
 411                  }
 412                  $reprocess = true;
 413                  continue;
 414              }
 415  
 416              // first, check for the simplest case: everything closes neatly.
 417              // Eventually, everything passes through here; if there are problems
 418              // we modify the input stream accordingly and then punt, so that
 419              // the tokens get processed again.
 420              $current_parent = array_pop($this->stack);
 421              if ($current_parent->name == $token->name) {
 422                  $token->start = $current_parent;
 423                  foreach ($this->injectors as $i => $injector) {
 424                      if (isset($token->skip[$i])) {
 425                          continue;
 426                      }
 427                      if ($token->rewind !== null && $token->rewind !== $i) {
 428                          continue;
 429                      }
 430                      $r = $token;
 431                      $injector->handleEnd($r);
 432                      $token = $this->processToken($r, $i);
 433                      $this->stack[] = $current_parent;
 434                      $reprocess = true;
 435                      break;
 436                  }
 437                  continue;
 438              }
 439  
 440              // okay, so we're trying to close the wrong tag
 441  
 442              // undo the pop previous pop
 443              $this->stack[] = $current_parent;
 444  
 445              // scroll back the entire nest, trying to find our tag.
 446              // (feature could be to specify how far you'd like to go)
 447              $size = count($this->stack);
 448              // -2 because -1 is the last element, but we already checked that
 449              $skipped_tags = false;
 450              for ($j = $size - 2; $j >= 0; $j--) {
 451                  if ($this->stack[$j]->name == $token->name) {
 452                      $skipped_tags = array_slice($this->stack, $j);
 453                      break;
 454                  }
 455              }
 456  
 457              // we didn't find the tag, so remove
 458              if ($skipped_tags === false) {
 459                  if ($escape_invalid_tags) {
 460                      if ($e) {
 461                          $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
 462                      }
 463                      $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
 464                  } else {
 465                      if ($e) {
 466                          $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
 467                      }
 468                      $token = $this->remove();
 469                  }
 470                  $reprocess = true;
 471                  continue;
 472              }
 473  
 474              // do errors, in REVERSE $j order: a,b,c with </a></b></c>
 475              $c = count($skipped_tags);
 476              if ($e) {
 477                  for ($j = $c - 1; $j > 0; $j--) {
 478                      // notice we exclude $j == 0, i.e. the current ending tag, from
 479                      // the errors... [TagClosedSuppress]
 480                      if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
 481                          $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
 482                      }
 483                  }
 484              }
 485  
 486              // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
 487              $replace = array($token);
 488              for ($j = 1; $j < $c; $j++) {
 489                  // ...as well as from the insertions
 490                  $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
 491                  $new_token->start = $skipped_tags[$j];
 492                  array_unshift($replace, $new_token);
 493                  if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
 494                      // [TagClosedAuto]
 495                      $element = clone $skipped_tags[$j];
 496                      $element->carryover = true;
 497                      $element->armor['MakeWellFormed_TagClosedError'] = true;
 498                      $replace[] = $element;
 499                  }
 500              }
 501              $token = $this->processToken($replace);
 502              $reprocess = true;
 503              continue;
 504          }
 505  
 506          $context->destroy('CurrentToken');
 507          $context->destroy('CurrentNesting');
 508          $context->destroy('InputZipper');
 509  
 510          unset($this->injectors, $this->stack, $this->tokens);
 511          return $zipper->toArray($token);
 512      }
 513  
 514      /**
 515       * Processes arbitrary token values for complicated substitution patterns.
 516       * In general:
 517       *
 518       * If $token is an array, it is a list of tokens to substitute for the
 519       * current token. These tokens then get individually processed. If there
 520       * is a leading integer in the list, that integer determines how many
 521       * tokens from the stream should be removed.
 522       *
 523       * If $token is a regular token, it is swapped with the current token.
 524       *
 525       * If $token is false, the current token is deleted.
 526       *
 527       * If $token is an integer, that number of tokens (with the first token
 528       * being the current one) will be deleted.
 529       *
 530       * @param HTMLPurifier_Token|array|int|bool $token Token substitution value
 531       * @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if
 532       *        this is not an injector related operation.
 533       * @throws HTMLPurifier_Exception
 534       */
 535      protected function processToken($token, $injector = -1)
 536      {
 537          // normalize forms of token
 538          if (is_object($token)) {
 539              $token = array(1, $token);
 540          }
 541          if (is_int($token)) {
 542              $token = array($token);
 543          }
 544          if ($token === false) {
 545              $token = array(1);
 546          }
 547          if (!is_array($token)) {
 548              throw new HTMLPurifier_Exception('Invalid token type from injector');
 549          }
 550          if (!is_int($token[0])) {
 551              array_unshift($token, 1);
 552          }
 553          if ($token[0] === 0) {
 554              throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
 555          }
 556  
 557          // $token is now an array with the following form:
 558          // array(number nodes to delete, new node 1, new node 2, ...)
 559  
 560          $delete = array_shift($token);
 561          list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
 562  
 563          if ($injector > -1) {
 564              // determine appropriate skips
 565              $oldskip = isset($old[0]) ? $old[0]->skip : array();
 566              foreach ($token as $object) {
 567                  $object->skip = $oldskip;
 568                  $object->skip[$injector] = true;
 569              }
 570          }
 571  
 572          return $r;
 573  
 574      }
 575  
 576      /**
 577       * Inserts a token before the current token. Cursor now points to
 578       * this token.  You must reprocess after this.
 579       * @param HTMLPurifier_Token $token
 580       */
 581      private function insertBefore($token)
 582      {
 583          // NB not $this->zipper->insertBefore(), due to positioning
 584          // differences
 585          $splice = $this->zipper->splice($this->token, 0, array($token));
 586  
 587          return $splice[1];
 588      }
 589  
 590      /**
 591       * Removes current token. Cursor now points to new token occupying previously
 592       * occupied space.  You must reprocess after this.
 593       */
 594      private function remove()
 595      {
 596          return $this->zipper->delete();
 597      }
 598  }
 599  
 600  // vim: et sw=4 sts=4


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1