00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00018 final class TidyValidator
00019 {
00020 private $content = null;
00021 private $messages = null;
00022 private $errorCount = null;
00023 private $warningCount = null;
00024
00025 private $config = array(
00026 'output-xhtml' => true,
00027 'doctype' => 'strict',
00028 'wrap' => 0,
00029 'quote-marks' => true,
00030 'drop-empty-paras' => true
00031 );
00032
00033 private $header = '
00034 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
00035 <html xmlns="http://www.w3.org/1999/xhtml">
00036 <head>
00037 <title></title>
00038 </head>
00039 <body>';
00040
00041 private $headerLines = 7;
00042
00043 private $encoding = 'utf8';
00044
00048 public static function create()
00049 {
00050 return new self;
00051 }
00052
00061 public function setContent($content)
00062 {
00063 $this->content = $content;
00064
00065 return $this;
00066 }
00067
00068 public function getContent()
00069 {
00070 return $this->content;
00071 }
00072
00073 public function getMessages()
00074 {
00075 return $this->messages;
00076 }
00077
00086 public function setConfig($config)
00087 {
00088 $this->config = $config;
00089
00090 return $this;
00091 }
00092
00093 public function getConfig()
00094 {
00095 return $this->config;
00096 }
00097
00104 public function setHeader($header)
00105 {
00106 $this->header = $header;
00107 $this->headerLines = count(explode("\n", $header));
00108
00109 return $this;
00110 }
00111
00112 public function getHeader()
00113 {
00114 return $this->header;
00115 }
00116
00125 public function setEncoding($encoding)
00126 {
00127 $this->encoding = $encoding;
00128
00129 return $this;
00130 }
00131
00132 public function getEncoding()
00133 {
00134 return $this->encoding;
00135 }
00136
00137 public function getErrorCount()
00138 {
00139 return $this->errorCount;
00140 }
00141
00142 public function getWarningCount()
00143 {
00144 return $this->warningCount;
00145 }
00146
00166 public function validateContent($content = null)
00167 {
00168 static $symbols = array(
00169 '…' => '…',
00170 '™' => '™',
00171 '©' => '©',
00172 '№' => '№',
00173 '—' => '—',
00174 '–' => '—',
00175 '«' => '«',
00176 '»' => '»',
00177 '„' => '„',
00178 '“' => '“',
00179 '•' => '•',
00180 '®' => '®',
00181 '¼' => '¼',
00182 '½' => '½',
00183 '¾' => '¾',
00184 '±' => '±'
00185 );
00186
00187 if ($content) {
00188 $this->setContent($content);
00189 } elseif (!$this->getContent()) {
00190 return $this;
00191 }
00192
00193 $tidy = tidy_parse_string(
00194 $this->getHeader()."\n".$this->getContent()."\n</body></html>",
00195 $this->getConfig(),
00196 $this->getEncoding()
00197 );
00198
00199 $this->errorCount = tidy_error_count($tidy);
00200 $this->warningCount = tidy_warning_count($tidy);
00201
00202 $rawMessages = tidy_get_error_buffer($tidy);
00203 $out = null;
00204
00205 if (!empty($rawMessages)) {
00206 $errorStrings =
00207 explode(
00208 "\n",
00209 htmlspecialchars($rawMessages)
00210 );
00211
00212 foreach ($errorStrings as $string) {
00213 list ($line, $num, $col, $rest) = explode(' ', $string, 4);
00214
00215 $out .=
00216 (
00217 $out == null
00218 ? null
00219 : "\n"
00220 )
00221 .'line '
00222 .($num - ($this->headerLines))
00223 .' column '.$rest;
00224 }
00225 }
00226
00227 $tidy->cleanRepair();
00228
00229 $outContent = array();
00230
00231 preg_match_all('/<body>(.*)<\/body>/s', $tidy, $outContent);
00232
00233 Assert::isTrue(isset($outContent[1][0]));
00234
00235 $outContent[1][0] = strtr($outContent[1][0], $symbols);
00236
00237 $crcBefore = crc32(
00238 preg_replace('/[\t\n\r\0 ]/', null, $this->getContent())
00239 );
00240 $crcAfter = crc32(
00241 preg_replace('/[\t\n\r\0 ]/', null, $outContent[1][0])
00242 );
00243
00244 if ($crcBefore != $crcAfter) {
00245 if (
00246 (
00247 $this->countTags('<[\t ]*p[\t ]*>', $this->getContent())
00248 != $this->countTags('<[\t ]*p[\t ]*>', $outContent[1][0])
00249 ) || (
00250 $this->countTags(
00251 '<[\t ]*\/[\t ]*p[\t ]*>',
00252 $this->getContent()
00253 )
00254 != $this->countTags(
00255 '<[\t ]*\/[\t ]*p[\t ]*>',
00256 $outContent[1][0]
00257 )
00258 )
00259 ) {
00260 $out =
00261 (
00262 $out == null
00263 ? null
00264 : $out."\n\n"
00265 )
00266 .'Paragraphs have been changed, please review content';
00267 } else
00268 if (!$out) {
00269 $out = 'Content has been changed, please review';
00270 }
00271 }
00272
00273 $this->messages = $out;
00274 $this->content = $outContent[1][0];
00275
00276 return $this;
00277 }
00278
00279 private function countTags($tag, $text)
00280 {
00281 $matches = array();
00282
00283 if (preg_match_all("/$tag/i", $text, $matches))
00284 return count($matches[0]);
00285
00286 return 0;
00287 }
00288 }
00289 ?>