GenericUri.class.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************************
00003  *   Copyright (C) 2007 by Ivan Y. Khvostishkov                            *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU Lesser General Public License as        *
00007  *   published by the Free Software Foundation; either version 3 of the    *
00008  *   License, or (at your option) any later version.                       *
00009  *                                                                         *
00010  ***************************************************************************/
00011 /* $Id: GenericUri.class.php 4687 2007-12-09 18:57:18Z voxus $ */
00012 
00018     class GenericUri implements Stringable
00019     {
00020         const CHARS_UNRESERVED      = 'a-z0-9-._~';
00021         const CHARS_SUBDELIMS       = '!$&\'()*+,;=';
00022         const PATTERN_PCTENCODED    = '%[0-9a-f][0-9a-f]';
00023         
00024         protected $scheme       = null;
00025         
00026         protected $userInfo = null;
00027         protected $host     = null;
00028         protected $port     = null;
00029         
00030         protected $path     = null;
00031         protected $query    = null;
00032         protected $fragment = null;
00033         
00037         public static function create()
00038         {
00039             return new self;
00040         }
00041         
00045         final public function parse($uri, $guessClass = false)
00046         {
00047             $schemePattern = '([^:/?#]+):';
00048             $authorityPattern = '(//([^/?#]*))';
00049             $restPattern = '([^?#]*)(\?([^#]*))?(#(.*))?';
00050             $matches = array();
00051             
00052             if (
00053                 $guessClass
00054                 && ($knownSubSchemes = $this->getKnownSubSchemes())
00055                 && preg_match("~^{$schemePattern}~", $uri, $matches)
00056                 && isset($knownSubSchemes[strtolower($matches[1])])
00057             )
00058                 $class = $knownSubSchemes[strtolower($matches[1])];
00059             else
00060                 $class = get_class($this);
00061             
00062             $result = new $class;
00063             
00064             if ($result instanceof Url)
00065                 $pattern = "({$schemePattern}{$authorityPattern})?";
00066             elseif ($result instanceof Urn)
00067                 $pattern = "({$schemePattern})?";
00068             else
00069                 $pattern = "({$schemePattern})?{$authorityPattern}?";
00070             
00071             $pattern = "~^{$pattern}{$restPattern}$~";
00072             
00073             if (!preg_match($pattern, $uri, $matches))
00074                 throw new WrongArgumentException('not well-formed URI');
00075             
00076             array_shift($matches);
00077             
00078             if ($matches[0])
00079                 $result->setScheme($matches[1]);
00080             
00081             array_shift($matches);
00082             array_shift($matches);
00083             
00084             if (!($result instanceof Urn)) {
00085                 if ($matches[0])
00086                     $result->setAuthority($matches[1]);
00087                 
00088                 array_shift($matches);
00089                 array_shift($matches);
00090             }
00091             
00092             $result->setPath($matches[0]);
00093             
00094             if (!empty($matches[1]))
00095                 $result->setQuery($matches[2]);
00096             
00097             if (!empty($matches[3]))
00098                 $result->setFragment($matches[4]);
00099             
00100             return $result;
00101         }
00102         
00107         final public function transform(GenericUri $reference, $strict = true)
00108         {
00109             if ($this->getScheme() === null)
00110                 throw new WrongStateException(
00111                     'URI without scheme cannot be a base URI'
00112                 );
00113             
00114             if (
00115                 $reference->getScheme() !== ($strict ? null : $this->getScheme())
00116             ) {
00117                 $class = get_class($reference);
00118                 $result = new $class;
00119                 
00120                 $result->
00121                     setScheme($reference->getScheme())->
00122                     setUserInfo($reference->getUserInfo())->
00123                     setHost($reference->getHost())->
00124                     setPort($reference->getPort())->
00125                     setPath(self::removeDotSegments($reference->getPath()))->
00126                     setQuery($reference->getQuery());
00127             } else {
00128                 $result = new $this;
00129                 
00130                 $result->setScheme($this->getScheme());
00131                 
00132                 if ($reference->getAuthority() !== null) {
00133                     $result->
00134                         setUserInfo($reference->getUserInfo())->
00135                         setHost($reference->getHost())->
00136                         setPort($reference->getPort())->
00137                         setPath(self::removeDotSegments($reference->getPath()))->
00138                         setQuery($reference->getQuery());
00139                 } else {
00140                     $result->
00141                         setUserInfo($this->getUserInfo())->
00142                         setHost($this->getHost())->
00143                         setPort($this->getPort());
00144                     
00145                     $path = $reference->getPath();
00146                     
00147                     if (!$path) {
00148                         $result->
00149                             setPath($this->getPath())->
00150                             setQuery(
00151                                 $reference->getQuery() !== null
00152                                 ? $reference->getQuery()
00153                                 : $this->getQuery()
00154                             );
00155                     } else {
00156                         $result->setQuery($reference->getQuery());
00157                         
00158                         if ($path[0] == '/')
00159                             $result->setPath($path);
00160                         else
00161                             $result->setPath(
00162                                 self::removeDotSegments(
00163                                     self::mergePath($reference->getPath())
00164                                 )
00165                             );
00166                     }
00167                 }
00168             }
00169             
00170             $result->setFragment($reference->getFragment());
00171             
00172             return $result;
00173         }
00174         
00175         public function getKnownSubSchemes()
00176         {
00177             return array_merge(
00178                 Urn::create()->getKnownSubSchemes(),
00179                 Url::create()->getKnownSubSchemes()
00180             );
00181         }
00182         
00186         public function setScheme($scheme)
00187         {
00188             $this->scheme = $scheme;
00189             
00190             return $this;
00191         }
00192         
00193         public function getScheme()
00194         {
00195             return $this->scheme;
00196         }
00197         
00201         public function setUserInfo($userInfo)
00202         {
00203             $this->userInfo = $userInfo;
00204             
00205             return $this;
00206         }
00207         
00208         public function getUserInfo()
00209         {
00210             return $this->userInfo;
00211         }
00212         
00216         public function setHost($host)
00217         {
00218             $this->host = $host;
00219             
00220             return $this;
00221         }
00222         
00223         public function getHost()
00224         {
00225             return $this->host;
00226         }
00227         
00231         public function setPort($port)
00232         {
00233             $this->port = $port;
00234             
00235             return $this;
00236         }
00237         
00238         public function getPort()
00239         {
00240             return $this->port;
00241         }
00242         
00246         public function setPath($path)
00247         {
00248             $this->path = $path;
00249             
00250             return $this;
00251         }
00252         
00253         public function getPath()
00254         {
00255             return $this->path;
00256         }
00257         
00261         public function setQuery($query)
00262         {
00263             $this->query = $query;
00264             
00265             return $this;
00266         }
00267         
00271         public function appendQuery($string, $separator = '&')
00272         {
00273             $query = $this->query;
00274             
00275             if ($query)
00276                 $query .= $separator;
00277             
00278             $query .= $string;
00279             
00280             $this->setQuery($query);
00281             
00282             return $this;
00283         }
00284         
00285         public function getQuery()
00286         {
00287             return $this->query;
00288         }
00289         
00293         public function setFragment($fragment)
00294         {
00295             $this->fragment = $fragment;
00296             
00297             return $this;
00298         }
00299         
00300         public function getFragment()
00301         {
00302             return $this->fragment;
00303         }
00304         
00308         public function setAuthority($authority)
00309         {
00310             $authorityPattern = '~^(([^@]*)@)?((\[.+\])|([^:]*))(:(.*))?$~';
00311             $authorityMatches = array();
00312             
00313             if (
00314                 !preg_match(
00315                     $authorityPattern, $authority, $authorityMatches
00316                 )
00317             )
00318                 throw new WrongArgumentException(
00319                     'not well-formed authority part'
00320                 );
00321             
00322             if ($authorityMatches[1])
00323                 $this->setUserInfo($authorityMatches[2]);
00324             
00325             $this->setHost($authorityMatches[3]);
00326             
00327             if (!empty($authorityMatches[6]))
00328                 $this->setPort($authorityMatches[7]);
00329             
00330             return $this;
00331         }
00332         
00333         public function getAuthority()
00334         {
00335             $result = null;
00336             
00337             if ($this->userInfo !== null)
00338                 $result .= $this->userInfo.'@';
00339             
00340             if ($this->host !== null)
00341                 $result .= $this->host;
00342             
00343             if ($this->port !== null)
00344                 $result .= ':'.$this->port;
00345             
00346             return $result;
00347         }
00348         
00349         public function setSchemeSpecificPart($schemeSpecificPart)
00350         {
00351             throw new UnsupportedMethodException('use parse() instead');
00352         }
00353         
00354         public function getSchemeSpecificPart()
00355         {
00356             $result = null;
00357             
00358             $authority = $this->getAuthority();
00359             
00360             if ($authority !== null)
00361                 $result .= '//'.$authority;
00362             
00363             $result .= $this->path;
00364             
00365             if ($this->query !== null)
00366                 $result .= '?'.$this->query;
00367             
00368             if ($this->fragment !== null)
00369                 $result .= '#'.$this->fragment;
00370             
00371             return $result;
00372         }
00373         
00374         public function toString()
00375         {
00376             $result = null;
00377             
00378             if ($this->scheme !== null)
00379                 $result .= $this->scheme.':';
00380             
00381             $result .= $this->getSchemeSpecificPart();
00382             
00383             return $result;
00384         }
00385         
00386         public function toStringFromRoot()
00387         {
00388             $result = $this->path;
00389             
00390             if ($this->query !== null)
00391                 $result .= '?'.$this->query;
00392             
00393             if ($this->fragment !== null)
00394                 $result .= '#'.$this->fragment;
00395             
00396             return $result;
00397         }
00398         
00399         public function isValid()
00400         {
00401             return
00402                 $this->isValidScheme()
00403                 && $this->isValidUserInfo()
00404                 && $this->isValidHost()
00405                 && $this->isValidPort()
00406                 && $this->isValidPath()
00407                 && $this->isValidQuery()
00408                 && $this->isValidFragment();
00409         }
00410         
00411         public function isValidScheme()
00412         {
00413             // empty string is NOT valid
00414             return (
00415                 $this->scheme === null
00416                 || preg_match('~^[a-z][-+.a-z0-9]*$~i', $this->scheme) == 1
00417             );
00418         }
00419         
00420         public function isValidUserInfo()
00421         {
00422             // empty string IS valid
00423             if (!$this->userInfo)
00424                 return true;
00425             
00426             $charPattern = $this->userInfoCharPattern();
00427             
00428             return (preg_match("/^$charPattern*$/i", $this->userInfo) == 1);
00429         }
00430         
00431         public function isValidHost()
00432         {
00433             // empty string IS valid
00434             if (empty($this->host))
00435                 return true;
00436             
00437             $decOctet =
00438                 '(\d)|'         // 0-9
00439                 .'([1-9]\d)|'   // 10-99
00440                 .'(1\d\d)|'     // 100-199
00441                 .'(2[0-4]\d)|'  // 200-249
00442                 .'(25[0-5])';   // 250-255
00443             
00444             $ipV4Address = "($decOctet)\.($decOctet)\.($decOctet)\.($decOctet)";
00445             
00446             $hexdig = '[0-9a-f]';
00447             
00448             $h16 = "$hexdig{1,4}";
00449             $ls32 = "(($h16:$h16)|($ipV4Address))";
00450             
00451             $ipV6Address =
00452                 "  (                        ($h16:){6} $ls32)"
00453                 ."|(                      ::($h16:){5} $ls32)"
00454                 ."|(              ($h16)? ::($h16:){4} $ls32)"
00455                 ."|( (($h16:){0,1} $h16)? ::($h16:){3} $ls32)"
00456                 ."|( (($h16:){0,2} $h16)? ::($h16:){2} $ls32)"
00457                 ."|( (($h16:){0,3} $h16)? :: $h16:     $ls32)"
00458                 ."|( (($h16:){0,4} $h16)? ::           $ls32)"
00459                 ."|( (($h16:){0,5} $h16)? ::           $h16 )"
00460                 ."|( (($h16:){0,6} $h16)? ::                )";
00461             
00462             $unreserved = self::CHARS_UNRESERVED;
00463             $subDelims = self::CHARS_SUBDELIMS;
00464             
00465             $ipVFutureAddress =
00466                 "v$hexdig+\.[{$unreserved}{$subDelims}:]+";
00467             
00468             if (
00469                 preg_match(
00470                     "/^\[(($ipV6Address)|($ipVFutureAddress))\]$/ix",
00471                     $this->host
00472                 )
00473             )
00474                 return true;
00475             
00476             if (preg_match("/^$ipV4Address$/i", $this->host)) {
00477                 return true;
00478             }
00479             
00480             return $this->isValidHostName();
00481         }
00482         
00483         public function isValidPort()
00484         {
00485             // empty string IS valid
00486             if (!$this->port)
00487                 return true;
00488             
00489             if (!preg_match('~^\d*$~', $this->port))
00490                 return false;
00491             
00492             return ($this->port > 0 && $this->port <= 65535);
00493         }
00494         
00495         public function isValidPath()
00496         {
00497             $charPattern = $this->segmentCharPattern();
00498             
00499             if (
00500                 !preg_match(
00501                     "/^($charPattern+)?"
00502                     ."(\/$charPattern*)*$/i",
00503                     $this->path
00504                 )
00505             )
00506                 return false;
00507             
00508             if ($this->getAuthority() !== null) {
00509                 // abempty
00510                 if (empty($this->path) || $this->path[0] == '/')
00511                     return true;
00512                 
00513             } elseif ($this->path && $this->path[0] == '/') {
00514                 // absolute
00515                 if ($this->path == '/' || $this->path[1] != '/')
00516                     return true;
00517                 
00518             } elseif ($this->scheme === null && $this->path) {
00519                 // noscheme - first segment must be w/o colon
00520                 
00521                 $segments = explode('/', $this->path);
00522                 
00523                 if (strpos($segments[0], ':') === false)
00524                     return true;
00525                 
00526             } elseif ($this->path) {
00527                 // rootless
00528                 if ($this->path[0] != '/')
00529                     return true;
00530                 
00531             } elseif (!$this->path) {
00532                 // empty
00533                 return true;
00534             }
00535             
00536             return false;
00537         }
00538         
00539         public function isValidQuery()
00540         {
00541             // empty string IS valid
00542             return $this->isValidFragmentOrQuery($this->query);
00543         }
00544         
00545         public function isValidFragment()
00546         {
00547             // empty string IS valid
00548             return $this->isValidFragmentOrQuery($this->fragment);
00549         }
00550         
00551         public function isAbsolute()
00552         {
00553             return ($this->scheme !== null);
00554         }
00555         
00556         public function isRelative()
00557         {
00558             return ($this->scheme === null);
00559         }
00560         
00561         protected function isValidHostName()
00562         {
00563             $charPattern = $this->hostNameCharPattern();
00564             
00565             return (
00566                 preg_match(
00567                     "/^$charPattern*$/i",
00568                     $this->host
00569                 ) == 1
00570             );
00571         }
00572         
00573         protected function charPattern(
00574             $extraChars = null, $pctEncodedPattern = true
00575         )
00576         {
00577             $unreserved = self::CHARS_UNRESERVED;
00578             $subDelims = self::CHARS_SUBDELIMS;
00579             $pctEncoded = self::PATTERN_PCTENCODED;
00580             
00581             $result = "{$unreserved}{$subDelims}$extraChars";
00582             
00583             if ($pctEncodedPattern)
00584                 $result = "(([{$result}])|({$pctEncoded}))";
00585             
00586             return $result;
00587         }
00588         
00589         protected function userInfoCharPattern($pctEncoded = true)
00590         {
00591             return $this->charPattern(':', $pctEncoded);
00592         }
00593         
00594         protected function hostNameCharPattern($pctEncoded = true)
00595         {
00596             return $this->charPattern(null, $pctEncoded);
00597         }
00598         
00599         protected function segmentCharPattern($pctEncoded = true)
00600         {
00601             return $this->charPattern(':@', $pctEncoded);
00602         }
00603         
00604         protected function fragmentOrQueryCharPattern($pctEncoded = true)
00605         {
00606             return $this->charPattern(':@\/?', $pctEncoded);
00607         }
00608         
00609         private function isValidFragmentOrQuery($string)
00610         {
00611             $charPattern = $this->fragmentOrQueryCharPattern();
00612             
00613             return (preg_match("/^$charPattern*$/i", $string) == 1);
00614         }
00615         
00616         private static function removeDotSegments($path)
00617         {
00618             $segments = array();
00619             
00620             while ($path) {
00621                 if (strpos($path, '../') === 0) {
00622                     $path = substr($path, 3);
00623                     
00624                 } elseif (strpos($path, './') === 0) {
00625                     $path = substr($path, 2);
00626                     
00627                 } elseif (strpos($path, '/./') === 0) {
00628                     $path = substr($path, 2);
00629                     
00630                 } elseif ($path == '/.') {
00631                     $path = '/';
00632                     
00633                 } elseif (strpos($path, '/../') === 0) {
00634                     $path = substr($path, 3);
00635                     
00636                     if ($segments) {
00637                         array_pop($segments);
00638                     }
00639                     
00640                 } elseif ($path == '/..') {
00641                     $path = '/';
00642                     
00643                     if ($segments) {
00644                         array_pop($segments);
00645                     }
00646                     
00647                 } elseif (($path == '..') || ($path == '.')) {
00648                     $path = null;
00649                     
00650                 } else {
00651                     $i = 0;
00652                     
00653                     if ($path[0] == '/')
00654                         $i = 1;
00655                     
00656                     $i = strpos($path, '/', $i);
00657                     
00658                     if ($i === false)
00659                         $i = strlen($path);
00660                     
00661                     $segments[] = substr($path, 0, $i);
00662                     
00663                     $path = substr($path, $i);
00664                 }
00665             }
00666             
00667             return implode('', $segments);
00668         }
00669         
00670         private function mergePath($path)
00671         {
00672             if ($this->getAuthority() !== null && !$this->getPath())
00673                 return '/'.$path;
00674             
00675             $segments = explode('/', $this->path);
00676             
00677             array_pop($segments);
00678             
00679             return implode('/', $segments).'/'.$path;
00680         }
00681         
00685         public function normalize()
00686         {
00687             // 1. case
00688             if ($this->getScheme() !== null)
00689                 $this->setScheme(mb_strtolower($this->getScheme()));
00690             
00691             // 2. percent-encoded
00692             $this->
00693                 setHost(
00694                     $this->normalizePercentEncoded(
00695                         $this->getHost(), $this->hostNameCharPattern(false)
00696                     )
00697                 )->
00698                 setUserInfo(
00699                     $this->normalizePercentEncoded(
00700                         $this->getUserInfo(), $this->userInfoCharPattern(false)
00701                     )
00702                 )->
00703                 setPath(
00704                     self::removeDotSegments(
00705                         $this->normalizePercentEncoded(
00706                             $this->getPath(),
00707                             '\/'.$this->segmentCharPattern(false)
00708                         )
00709                     )
00710                 )->
00711                 setQuery(
00712                     $this->normalizePercentEncoded(
00713                         $this->getQuery(),
00714                         $this->fragmentOrQueryCharPattern(false)
00715                     )
00716                 )->
00717                 setFragment(
00718                     $this->normalizePercentEncoded(
00719                         $this->getFragment(),
00720                         $this->fragmentOrQueryCharPattern(false)
00721                     )
00722                 );
00723             
00724             // 3. and case again
00725             if ($this->getHost() !== null)
00726                 $this->setHost(mb_strtolower($this->getHost()));
00727             
00728             return $this;
00729         }
00730         
00731         private function normalizePercentEncoded(
00732             $string, $unreservedPartChars
00733         )
00734         {
00735             if ($string === null)
00736                 return null;
00737             
00738             $result = preg_replace_callback(
00739                 '/(('.self::PATTERN_PCTENCODED.')|(.))/sui',
00740                 array(
00741                     PercentEncodingNormalizator::create()->
00742                         setUnreservedPartChars($unreservedPartChars),
00743                     'normalize'
00744                 ),
00745                 $string
00746             );
00747             
00748             return $result;
00749         }
00750     }
00751     
00755     final class PercentEncodingNormalizator
00756     {
00757         private $unreservedPartChars = null;
00758         
00762         public static function create()
00763         {
00764             return new self;
00765         }
00766         
00770         public function setUnreservedPartChars($unreservedPartChars)
00771         {
00772             $this->unreservedPartChars = $unreservedPartChars;
00773             return $this;
00774         }
00775         
00776         public function normalize($matched)
00777         {
00778             $char = $matched[0];
00779             if (mb_strlen($char) == 1) {
00780                 if (
00781                     !preg_match(
00782                         '/^['.$this->unreservedPartChars.']$/',
00783                         $char
00784                     )
00785                 )
00786                     $char = rawurlencode($char);
00787             } else {
00788                 if (
00789                     preg_match(
00790                         '/^['.GenericUri::CHARS_UNRESERVED.']$/',
00791                         rawurldecode($char)
00792                     )
00793                 )
00794                     $char = rawurldecode($char);
00795                 else
00796                     $char = strtoupper($char);
00797             }
00798             return $char;
00799         }
00800     }
00801 ?>

Generated on Sun Dec 9 21:56:24 2007 for onPHP by  doxygen 1.5.4