RussianTypograph.class.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************************
00003  *   Copyright (C) 2006-2007 by Konstantin V. Arkhipov                     *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU Lesser General Public License as        *
00007  *   published by the Free Software Foundation; either version 3 of the    *
00008  *   License, or (at your option) any later version.                       *
00009  *                                                                         *
00010  ***************************************************************************/
00011 /* $Id: RussianTypograph.class.php 4687 2007-12-09 18:57:18Z voxus $ */
00012 
00018     final class RussianTypograph extends BaseFilter
00019     {
00020         const MAGIC_DELIMITER = '<>'; // brilliant!
00021         
00022         private static $symbols =
00023             array(
00024                 ' '        => ' ', // bovm
00025                 ' < '   => ' &lt; ',
00026                 ' > '   => ' &gt; ',
00027                 '…'       => '&hellip;',
00028                 '...'   => '&hellip;',
00029                 '™'       => '&trade;',
00030                 '(tm)'  => '&trade;',
00031                 '(TM)'  => '&trade;',
00032                 '©'        => '&copy;',
00033                 '(c)'   => '&copy;',
00034                 '(C)'   => '&copy;',
00035                 '№'       => '&#8470;',
00036                 '—'       => '&mdash;',
00037                 '–'       => '&mdash;',
00038                 '«'        => '&laquo;',
00039                 '»'        => '&raquo;',
00040                 '„'       => '&bdquo;',
00041                 '“'       => '&ldquo;',
00042                 '•'       => '&bull;',
00043                 '®'        => '&reg;',
00044                 '¼'        => '&frac14;',
00045                 '½'        => '&frac12;',
00046                 '¾'        => '&frac34;',
00047                 '±'        => '&plusmn;',
00048                 '+/-'   => '&plusmn;',
00049                 '!='    => '&ne;',
00050                 '<>'    => '&ne;',
00051                 
00052                 // just to avoid regexp's
00053                 ' 1/4'  => ' &frac14;',
00054                 ' 1/2'  => ' &frac12;',
00055                 ' 3/4'  => ' &frac34;',
00056                 '1/4 '  => '&frac14; ',
00057                 '1/2 '  => '&frac12; ',
00058                 '3/4 '  => '&frac34; '
00059             );
00060         
00061         private static $from = array(
00062             '~\-{2,}~',                     // --
00063             '~([\w\pL]+)\s\-\s~',           // foo - bar
00064             '~([\s\pP])([\w\pL]{1,2})\s~U', // bar a foo
00065             '~\"([^\s]*)\"~',               // "quote"
00066             '~\"([^\s]*)\s+([^\s\.]*)\"~',  // "quote quote"
00067             '~\"(.*)\"~e',                  // "qu"o"te"
00068             '~([\w\pL\']+)~e'               // rock'n'roll
00069         );
00070         
00071         private static $to = array(
00072             '-',
00073             '$1&nbsp;&#151; ',
00074             '$1$2&nbsp;',
00075             '&laquo;$1&raquo;',
00076             '&laquo;$1 $2&raquo;',
00077             '\'&laquo;\'.$this->innerQuotes(\'$1\').\'&raquo;\'',
00078             'str_replace("\'", \'&#146;\', \'$1\')'
00079         );
00080         
00084         public static function me()
00085         {
00086             return Singleton::getInstance(__CLASS__);
00087         }
00088         
00089         public function apply($value)
00090         {
00091             if (!$value = trim(strtr($value, self::$symbols)))
00092                 return null;
00093             
00094             $list =
00095                 preg_split(
00096                     '~([^<>]*)(?![^<]*?>)~',
00097                     $value,
00098                     null,
00099                     PREG_SPLIT_DELIM_CAPTURE
00100                         | PREG_SPLIT_NO_EMPTY
00101                         | PREG_SPLIT_OFFSET_CAPTURE
00102                 );
00103             
00104             $tags = array();
00105             $text = null;
00106             
00107             if (count($list) > 1) {
00108                 foreach ($list as $row) {
00109                     $string = $row[0];
00110                     if (
00111                         (strpos($string, '<') === false)
00112                         && (strpos($string, '>') === false)
00113                     ) {
00114                         $text .= $string;
00115                     } else {
00116                         $tags[] = $string;
00117                         $text .= self::MAGIC_DELIMITER;
00118                     }
00119                 }
00120             } else {
00121                 $text = $list[0][0];
00122             }
00123             
00124             $text = $this->typographize($text);
00125             
00126             if ($tags) {
00127                 $i = 0;
00128                 $out = null;
00129                 
00130                 foreach (explode(self::MAGIC_DELIMITER, $text) as $chunk) {
00131                     $out .= $chunk;
00132                     
00133                     if (isset($tags[$i]))
00134                         $out .= $tags[$i++];
00135                 }
00136                 
00137                 return $out;
00138             }
00139             
00140             return CompressWhitespaceFilter::me()->apply($text);
00141         }
00142         
00143         private function typographize($text)
00144         {
00145             if (strlen($text) < 2)
00146                 return $text;
00147             
00148             return
00149                 preg_replace(
00150                     self::$from,
00151                     self::$to,
00152                     stripslashes($text)
00153                 );
00154         }
00155         
00156         private function innerQuotes($text)
00157         {
00158             return
00159                 preg_replace(
00160                     array(
00161                         '~&laquo;(.*)&raquo;~U',
00162                         '~\"(.*)\"~U',
00163                     ),
00164                     '&#132;$1&#147;',
00165                     stripslashes($text)
00166                 );
00167         }
00168     }
00169 ?>

Generated on Sun Dec 9 21:56:23 2007 for onPHP by  doxygen 1.5.4