TidyValidator.class.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************************
00003  *   Copyright (C) 2007 by Sergey M. Skachkov                              *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU Lesser General Public License as        *
00007  *   published by the Free Software Foundation; either version 3 of the    *
00008  *   License, or (at your option) any later version.                       *
00009  *                                                                         *
00010  ***************************************************************************/
00011 /* $Id: TidyValidator.class.php 4687 2007-12-09 18:57:18Z voxus $ */
00012 
00018     final class TidyValidator
00019     {
00020         private $content        = null;
00021         private $messages       = null;
00022         private $errorCount     = null;
00023         private $warningCount   = null;
00024         
00025         private $config             = array(
00026             'output-xhtml'      => true,
00027             'doctype'           => 'strict',
00028             'wrap'              => 0,
00029             'quote-marks'       => true,
00030             'drop-empty-paras'  => true
00031         );
00032         
00033         private $header         = '
00034             <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
00035             <html xmlns="http://www.w3.org/1999/xhtml">
00036             <head>
00037                 <title></title>
00038             </head>
00039             <body>';
00040         
00041         private $headerLines        = 7;
00042         
00043         private $encoding       = 'utf8';
00044         
00048         public static function create()
00049         {
00050             return new self;
00051         }
00052         
00061         public function setContent($content)
00062         {
00063             $this->content = $content;
00064             
00065             return $this;
00066         }
00067         
00068         public function getContent()
00069         {
00070             return $this->content;
00071         }
00072         
00073         public function getMessages()
00074         {
00075             return $this->messages;
00076         }
00077         
00086         public function setConfig($config)
00087         {
00088             $this->config = $config;
00089             
00090             return $this;
00091         }
00092         
00093         public function getConfig()
00094         {
00095             return $this->config;
00096         }
00097         
00104         public function setHeader($header)
00105         {
00106             $this->header = $header;
00107             $this->headerLines = count(explode("\n", $header));
00108             
00109             return $this;
00110         }
00111         
00112         public function getHeader()
00113         {
00114             return $this->header;
00115         }
00116         
00125         public function setEncoding($encoding)
00126         {
00127             $this->encoding = $encoding;
00128             
00129             return $this;
00130         }
00131         
00132         public function getEncoding()
00133         {
00134             return $this->encoding;
00135         }
00136         
00137         public function getErrorCount()
00138         {
00139             return $this->errorCount;
00140         }
00141         
00142         public function getWarningCount()
00143         {
00144             return $this->warningCount;
00145         }
00146         
00166         public function validateContent($content = null)
00167         {
00168             static $symbols = array(
00169                 '…'       => '&hellip;',
00170                 '™'       => '&trade;',
00171                 '©'        => '&copy;',
00172                 '№'       => '&#8470;',
00173                 '—'       => '&mdash;',
00174                 '–'       => '&mdash;',
00175                 '«'        => '&laquo;',
00176                 '»'        => '&raquo;',
00177                 '„'       => '&bdquo;',
00178                 '“'       => '&ldquo;',
00179                 '•'       => '&bull;',
00180                 '®'        => '&reg;',
00181                 '¼'        => '&frac14;',
00182                 '½'        => '&frac12;',
00183                 '¾'        => '&frac34;',
00184                 '±'        => '&plusmn;'
00185             );
00186             
00187             if ($content) {
00188                 $this->setContent($content);
00189             } elseif (!$this->getContent()) {
00190                 return $this;
00191             }
00192             
00193             $tidy = tidy_parse_string(
00194                 $this->getHeader()."\n".$this->getContent()."\n</body></html>",
00195                 $this->getConfig(),
00196                 $this->getEncoding()
00197             );
00198             
00199             $this->errorCount = tidy_error_count($tidy);
00200             $this->warningCount = tidy_warning_count($tidy);
00201             
00202             $rawMessages = tidy_get_error_buffer($tidy);
00203             $out = null;
00204             
00205             if (!empty($rawMessages)) {
00206                 $errorStrings =
00207                     explode(
00208                         "\n",
00209                         htmlspecialchars($rawMessages)
00210                     );
00211                 
00212                 foreach ($errorStrings as $string) {
00213                     list ($line, $num, $col, $rest) = explode(' ', $string, 4);
00214                     
00215                     $out .=
00216                         (
00217                             $out == null
00218                                 ? null
00219                                 : "\n"
00220                         )
00221                         .'line '
00222                         .($num - ($this->headerLines))
00223                         .' column '.$rest;
00224                 }
00225             }
00226             
00227             $tidy->cleanRepair();
00228             
00229             $outContent = array();
00230             
00231             preg_match_all('/<body>(.*)<\/body>/s', $tidy, $outContent);
00232             
00233             Assert::isTrue(isset($outContent[1][0]));
00234             
00235             $outContent[1][0] = strtr($outContent[1][0], $symbols);
00236             
00237             $crcBefore = crc32(
00238                 preg_replace('/[\t\n\r\0 ]/', null, $this->getContent())
00239             );
00240             $crcAfter = crc32(
00241                 preg_replace('/[\t\n\r\0 ]/', null, $outContent[1][0])
00242             );
00243             
00244             if ($crcBefore != $crcAfter) {
00245                 if (
00246                     (
00247                         $this->countTags('<[\t ]*p[\t ]*>', $this->getContent())
00248                         != $this->countTags('<[\t ]*p[\t ]*>', $outContent[1][0])
00249                     ) || (
00250                         $this->countTags(
00251                             '<[\t ]*\/[\t ]*p[\t ]*>',
00252                             $this->getContent()
00253                         )
00254                         != $this->countTags(
00255                             '<[\t ]*\/[\t ]*p[\t ]*>',
00256                             $outContent[1][0]
00257                         )
00258                     )
00259                 ) {
00260                     $out =
00261                         (
00262                             $out == null
00263                                 ? null
00264                                 : $out."\n\n"
00265                         )
00266                         .'Paragraphs have been changed, please review content';
00267                 } else
00268                     if (!$out) {
00269                         $out = 'Content has been changed, please review';
00270                     }
00271             }
00272             
00273             $this->messages = $out;
00274             $this->content = $outContent[1][0];
00275             
00276             return $this;
00277         }
00278         
00279         private function countTags($tag, $text)
00280         {
00281             $matches = array();
00282             
00283             if (preg_match_all("/$tag/i", $text, $matches))
00284                 return count($matches[0]);
00285             
00286             return 0;
00287         }
00288     }
00289 ?>

Generated on Sun Dec 9 21:56:24 2007 for onPHP by  doxygen 1.5.4