gnu-social/extlib/Net/URL2.php
Brion Vibber fa37967858 Rebuilt HTTPClient class as an extension of PEAR HTTP_Request2 package, adding redirect handling and convenience functions.
Caching support will be added in future work after unit tests have been added.

* extlib: add PEAR HTTP_Request2 0.4.1 alpha
* extlib: update PEAR Net_URL2 to 0.3.0 beta for HTTP_Request2 compatibility
* moved direct usage of CURL and file_get_contents to HTTPClient class, excluding external-sourced libraries

Note some plugins haven't been tested yet.
2009-11-02 06:31:28 -08:00

929 lines
26 KiB
PHP

<?php
/**
* Net_URL2, a class representing a URL as per RFC 3986.
*
* PHP version 5
*
* LICENSE:
*
* Copyright (c) 2007-2009, Peytz & Co. A/S
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_LexerGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @category Networking
* @package Net_URL2
* @author Christian Schmidt <chsc@peytz.dk>
* @copyright 2007-2008 Peytz & Co. A/S
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version CVS: $Id: URL2.php 286661 2009-08-02 12:50:54Z schmidt $
* @link http://www.rfc-editor.org/rfc/rfc3986.txt
*/
/**
* Represents a URL as per RFC 3986.
*
* @category Networking
* @package Net_URL2
* @author Christian Schmidt <chsc@peytz.dk>
* @copyright 2007-2008 Peytz & Co. ApS
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version Release: @package_version@
* @link http://pear.php.net/package/Net_URL2
*/
class Net_URL2
{
/**
* Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default
* is true.
*/
const OPTION_STRICT = 'strict';
/**
* Represent arrays in query using PHP's [] notation. Default is true.
*/
const OPTION_USE_BRACKETS = 'use_brackets';
/**
* URL-encode query variable keys. Default is true.
*/
const OPTION_ENCODE_KEYS = 'encode_keys';
/**
* Query variable separators when parsing the query string. Every character
* is considered a separator. Default is specified by the
* arg_separator.input php.ini setting (this defaults to "&").
*/
const OPTION_SEPARATOR_INPUT = 'input_separator';
/**
* Query variable separator used when generating the query string. Default
* is specified by the arg_separator.output php.ini setting (this defaults
* to "&").
*/
const OPTION_SEPARATOR_OUTPUT = 'output_separator';
/**
* Default options corresponds to how PHP handles $_GET.
*/
private $_options = array(
self::OPTION_STRICT => true,
self::OPTION_USE_BRACKETS => true,
self::OPTION_ENCODE_KEYS => true,
self::OPTION_SEPARATOR_INPUT => 'x&',
self::OPTION_SEPARATOR_OUTPUT => 'x&',
);
/**
* @var string|bool
*/
private $_scheme = false;
/**
* @var string|bool
*/
private $_userinfo = false;
/**
* @var string|bool
*/
private $_host = false;
/**
* @var int|bool
*/
private $_port = false;
/**
* @var string
*/
private $_path = '';
/**
* @var string|bool
*/
private $_query = false;
/**
* @var string|bool
*/
private $_fragment = false;
/**
* Constructor.
*
* @param string $url an absolute or relative URL
* @param array $options an array of OPTION_xxx constants
*/
public function __construct($url, $options = null)
{
$this->setOption(self::OPTION_SEPARATOR_INPUT,
ini_get('arg_separator.input'));
$this->setOption(self::OPTION_SEPARATOR_OUTPUT,
ini_get('arg_separator.output'));
if (is_array($options)) {
foreach ($options as $optionName => $value) {
$this->setOption($optionName, $value);
}
}
if (preg_match('@^([a-z][a-z0-9.+-]*):@i', $url, $reg)) {
$this->_scheme = $reg[1];
$url = substr($url, strlen($reg[0]));
}
if (preg_match('@^//([^/#?]+)@', $url, $reg)) {
$this->setAuthority($reg[1]);
$url = substr($url, strlen($reg[0]));
}
$i = strcspn($url, '?#');
$this->_path = substr($url, 0, $i);
$url = substr($url, $i);
if (preg_match('@^\?([^#]*)@', $url, $reg)) {
$this->_query = $reg[1];
$url = substr($url, strlen($reg[0]));
}
if ($url) {
$this->_fragment = substr($url, 1);
}
}
/**
* Magic Setter.
*
* This method will magically set the value of a private variable ($var)
* with the value passed as the args
*
* @param string $var The private variable to set.
* @param mixed $arg An argument of any type.
* @return void
*/
public function __set($var, $arg)
{
$method = 'set' . $var;
if (method_exists($this, $method)) {
$this->$method($arg);
}
}
/**
* Magic Getter.
*
* This is the magic get method to retrieve the private variable
* that was set by either __set() or it's setter...
*
* @param string $var The property name to retrieve.
* @return mixed $this->$var Either a boolean false if the
* property is not set or the value
* of the private property.
*/
public function __get($var)
{
$method = 'get' . $var;
if (method_exists($this, $method)) {
return $this->$method();
}
return false;
}
/**
* Returns the scheme, e.g. "http" or "urn", or false if there is no
* scheme specified, i.e. if this is a relative URL.
*
* @return string|bool
*/
public function getScheme()
{
return $this->_scheme;
}
/**
* Sets the scheme, e.g. "http" or "urn". Specify false if there is no
* scheme specified, i.e. if this is a relative URL.
*
* @param string|bool $scheme e.g. "http" or "urn", or false if there is no
* scheme specified, i.e. if this is a relative
* URL
*
* @return void
* @see getScheme()
*/
public function setScheme($scheme)
{
$this->_scheme = $scheme;
}
/**
* Returns the user part of the userinfo part (the part preceding the first
* ":"), or false if there is no userinfo part.
*
* @return string|bool
*/
public function getUser()
{
return $this->_userinfo !== false
? preg_replace('@:.*$@', '', $this->_userinfo)
: false;
}
/**
* Returns the password part of the userinfo part (the part after the first
* ":"), or false if there is no userinfo part (i.e. the URL does not
* contain "@" in front of the hostname) or the userinfo part does not
* contain ":".
*
* @return string|bool
*/
public function getPassword()
{
return $this->_userinfo !== false
? substr(strstr($this->_userinfo, ':'), 1)
: false;
}
/**
* Returns the userinfo part, or false if there is none, i.e. if the
* authority part does not contain "@".
*
* @return string|bool
*/
public function getUserinfo()
{
return $this->_userinfo;
}
/**
* Sets the userinfo part. If two arguments are passed, they are combined
* in the userinfo part as username ":" password.
*
* @param string|bool $userinfo userinfo or username
* @param string|bool $password optional password, or false
*
* @return void
*/
public function setUserinfo($userinfo, $password = false)
{
$this->_userinfo = $userinfo;
if ($password !== false) {
$this->_userinfo .= ':' . $password;
}
}
/**
* Returns the host part, or false if there is no authority part, e.g.
* relative URLs.
*
* @return string|bool a hostname, an IP address, or false
*/
public function getHost()
{
return $this->_host;
}
/**
* Sets the host part. Specify false if there is no authority part, e.g.
* relative URLs.
*
* @param string|bool $host a hostname, an IP address, or false
*
* @return void
*/
public function setHost($host)
{
$this->_host = $host;
}
/**
* Returns the port number, or false if there is no port number specified,
* i.e. if the default port is to be used.
*
* @return int|bool
*/
public function getPort()
{
return $this->_port;
}
/**
* Sets the port number. Specify false if there is no port number specified,
* i.e. if the default port is to be used.
*
* @param int|bool $port a port number, or false
*
* @return void
*/
public function setPort($port)
{
$this->_port = intval($port);
}
/**
* Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or
* false if there is no authority.
*
* @return string|bool
*/
public function getAuthority()
{
if (!$this->_host) {
return false;
}
$authority = '';
if ($this->_userinfo !== false) {
$authority .= $this->_userinfo . '@';
}
$authority .= $this->_host;
if ($this->_port !== false) {
$authority .= ':' . $this->_port;
}
return $authority;
}
/**
* Sets the authority part, i.e. [ userinfo "@" ] host [ ":" port ]. Specify
* false if there is no authority.
*
* @param string|false $authority a hostname or an IP addresse, possibly
* with userinfo prefixed and port number
* appended, e.g. "foo:bar@example.org:81".
*
* @return void
*/
public function setAuthority($authority)
{
$this->_userinfo = false;
$this->_host = false;
$this->_port = false;
if (preg_match('@^(([^\@]*)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) {
if ($reg[1]) {
$this->_userinfo = $reg[2];
}
$this->_host = $reg[3];
if (isset($reg[5])) {
$this->_port = intval($reg[5]);
}
}
}
/**
* Returns the path part (possibly an empty string).
*
* @return string
*/
public function getPath()
{
return $this->_path;
}
/**
* Sets the path part (possibly an empty string).
*
* @param string $path a path
*
* @return void
*/
public function setPath($path)
{
$this->_path = $path;
}
/**
* Returns the query string (excluding the leading "?"), or false if "?"
* is not present in the URL.
*
* @return string|bool
* @see self::getQueryVariables()
*/
public function getQuery()
{
return $this->_query;
}
/**
* Sets the query string (excluding the leading "?"). Specify false if "?"
* is not present in the URL.
*
* @param string|bool $query a query string, e.g. "foo=1&bar=2"
*
* @return void
* @see self::setQueryVariables()
*/
public function setQuery($query)
{
$this->_query = $query;
}
/**
* Returns the fragment name, or false if "#" is not present in the URL.
*
* @return string|bool
*/
public function getFragment()
{
return $this->_fragment;
}
/**
* Sets the fragment name. Specify false if "#" is not present in the URL.
*
* @param string|bool $fragment a fragment excluding the leading "#", or
* false
*
* @return void
*/
public function setFragment($fragment)
{
$this->_fragment = $fragment;
}
/**
* Returns the query string like an array as the variables would appear in
* $_GET in a PHP script. If the URL does not contain a "?", an empty array
* is returned.
*
* @return array
*/
public function getQueryVariables()
{
$pattern = '/[' .
preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') .
']/';
$parts = preg_split($pattern, $this->_query, -1, PREG_SPLIT_NO_EMPTY);
$return = array();
foreach ($parts as $part) {
if (strpos($part, '=') !== false) {
list($key, $value) = explode('=', $part, 2);
} else {
$key = $part;
$value = null;
}
if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
$key = rawurldecode($key);
}
$value = rawurldecode($value);
if ($this->getOption(self::OPTION_USE_BRACKETS) &&
preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) {
$key = $matches[1];
$idx = $matches[2];
// Ensure is an array
if (empty($return[$key]) || !is_array($return[$key])) {
$return[$key] = array();
}
// Add data
if ($idx === '') {
$return[$key][] = $value;
} else {
$return[$key][$idx] = $value;
}
} elseif (!$this->getOption(self::OPTION_USE_BRACKETS)
&& !empty($return[$key])
) {
$return[$key] = (array) $return[$key];
$return[$key][] = $value;
} else {
$return[$key] = $value;
}
}
return $return;
}
/**
* Sets the query string to the specified variable in the query string.
*
* @param array $array (name => value) array
*
* @return void
*/
public function setQueryVariables(array $array)
{
if (!$array) {
$this->_query = false;
} else {
foreach ($array as $name => $value) {
if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
$name = self::urlencode($name);
}
if (is_array($value)) {
foreach ($value as $k => $v) {
$parts[] = $this->getOption(self::OPTION_USE_BRACKETS)
? sprintf('%s[%s]=%s', $name, $k, $v)
: ($name . '=' . $v);
}
} elseif (!is_null($value)) {
$parts[] = $name . '=' . self::urlencode($value);
} else {
$parts[] = $name;
}
}
$this->_query = implode($this->getOption(self::OPTION_SEPARATOR_OUTPUT),
$parts);
}
}
/**
* Sets the specified variable in the query string.
*
* @param string $name variable name
* @param mixed $value variable value
*
* @return array
*/
public function setQueryVariable($name, $value)
{
$array = $this->getQueryVariables();
$array[$name] = $value;
$this->setQueryVariables($array);
}
/**
* Removes the specifed variable from the query string.
*
* @param string $name a query string variable, e.g. "foo" in "?foo=1"
*
* @return void
*/
public function unsetQueryVariable($name)
{
$array = $this->getQueryVariables();
unset($array[$name]);
$this->setQueryVariables($array);
}
/**
* Returns a string representation of this URL.
*
* @return string
*/
public function getURL()
{
// See RFC 3986, section 5.3
$url = "";
if ($this->_scheme !== false) {
$url .= $this->_scheme . ':';
}
$authority = $this->getAuthority();
if ($authority !== false) {
$url .= '//' . $authority;
}
$url .= $this->_path;
if ($this->_query !== false) {
$url .= '?' . $this->_query;
}
if ($this->_fragment !== false) {
$url .= '#' . $this->_fragment;
}
return $url;
}
/**
* Returns a string representation of this URL.
*
* @return string
* @see toString()
*/
public function __toString()
{
return $this->getURL();
}
/**
* Returns a normalized string representation of this URL. This is useful
* for comparison of URLs.
*
* @return string
*/
public function getNormalizedURL()
{
$url = clone $this;
$url->normalize();
return $url->getUrl();
}
/**
* Returns a normalized Net_URL2 instance.
*
* @return Net_URL2
*/
public function normalize()
{
// See RFC 3886, section 6
// Schemes are case-insensitive
if ($this->_scheme) {
$this->_scheme = strtolower($this->_scheme);
}
// Hostnames are case-insensitive
if ($this->_host) {
$this->_host = strtolower($this->_host);
}
// Remove default port number for known schemes (RFC 3986, section 6.2.3)
if ($this->_port &&
$this->_scheme &&
$this->_port == getservbyname($this->_scheme, 'tcp')) {
$this->_port = false;
}
// Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1)
foreach (array('_userinfo', '_host', '_path') as $part) {
if ($this->$part) {
$this->$part = preg_replace('/%[0-9a-f]{2}/ie',
'strtoupper("\0")',
$this->$part);
}
}
// Path segment normalization (RFC 3986, section 6.2.2.3)
$this->_path = self::removeDotSegments($this->_path);
// Scheme based normalization (RFC 3986, section 6.2.3)
if ($this->_host && !$this->_path) {
$this->_path = '/';
}
}
/**
* Returns whether this instance represents an absolute URL.
*
* @return bool
*/
public function isAbsolute()
{
return (bool) $this->_scheme;
}
/**
* Returns an Net_URL2 instance representing an absolute URL relative to
* this URL.
*
* @param Net_URL2|string $reference relative URL
*
* @return Net_URL2
*/
public function resolve($reference)
{
if (!$reference instanceof Net_URL2) {
$reference = new self($reference);
}
if (!$this->isAbsolute()) {
throw new Exception('Base-URL must be absolute');
}
// A non-strict parser may ignore a scheme in the reference if it is
// identical to the base URI's scheme.
if (!$this->getOption(self::OPTION_STRICT) && $reference->_scheme == $this->_scheme) {
$reference->_scheme = false;
}
$target = new self('');
if ($reference->_scheme !== false) {
$target->_scheme = $reference->_scheme;
$target->setAuthority($reference->getAuthority());
$target->_path = self::removeDotSegments($reference->_path);
$target->_query = $reference->_query;
} else {
$authority = $reference->getAuthority();
if ($authority !== false) {
$target->setAuthority($authority);
$target->_path = self::removeDotSegments($reference->_path);
$target->_query = $reference->_query;
} else {
if ($reference->_path == '') {
$target->_path = $this->_path;
if ($reference->_query !== false) {
$target->_query = $reference->_query;
} else {
$target->_query = $this->_query;
}
} else {
if (substr($reference->_path, 0, 1) == '/') {
$target->_path = self::removeDotSegments($reference->_path);
} else {
// Merge paths (RFC 3986, section 5.2.3)
if ($this->_host !== false && $this->_path == '') {
$target->_path = '/' . $this->_path;
} else {
$i = strrpos($this->_path, '/');
if ($i !== false) {
$target->_path = substr($this->_path, 0, $i + 1);
}
$target->_path .= $reference->_path;
}
$target->_path = self::removeDotSegments($target->_path);
}
$target->_query = $reference->_query;
}
$target->setAuthority($this->getAuthority());
}
$target->_scheme = $this->_scheme;
}
$target->_fragment = $reference->_fragment;
return $target;
}
/**
* Removes dots as described in RFC 3986, section 5.2.4, e.g.
* "/foo/../bar/baz" => "/bar/baz"
*
* @param string $path a path
*
* @return string a path
*/
public static function removeDotSegments($path)
{
$output = '';
// Make sure not to be trapped in an infinite loop due to a bug in this
// method
$j = 0;
while ($path && $j++ < 100) {
if (substr($path, 0, 2) == './') {
// Step 2.A
$path = substr($path, 2);
} elseif (substr($path, 0, 3) == '../') {
// Step 2.A
$path = substr($path, 3);
} elseif (substr($path, 0, 3) == '/./' || $path == '/.') {
// Step 2.B
$path = '/' . substr($path, 3);
} elseif (substr($path, 0, 4) == '/../' || $path == '/..') {
// Step 2.C
$path = '/' . substr($path, 4);
$i = strrpos($output, '/');
$output = $i === false ? '' : substr($output, 0, $i);
} elseif ($path == '.' || $path == '..') {
// Step 2.D
$path = '';
} else {
// Step 2.E
$i = strpos($path, '/');
if ($i === 0) {
$i = strpos($path, '/', 1);
}
if ($i === false) {
$i = strlen($path);
}
$output .= substr($path, 0, $i);
$path = substr($path, $i);
}
}
return $output;
}
/**
* Percent-encodes all non-alphanumeric characters except these: _ . - ~
* Similar to PHP's rawurlencode(), except that it also encodes ~ in PHP
* 5.2.x and earlier.
*
* @param $raw the string to encode
* @return string
*/
public static function urlencode($string)
{
$encoded = rawurlencode($string);
// This is only necessary in PHP < 5.3.
$encoded = str_replace('%7E', '~', $encoded);
return $encoded;
}
/**
* Returns a Net_URL2 instance representing the canonical URL of the
* currently executing PHP script.
*
* @return string
*/
public static function getCanonical()
{
if (!isset($_SERVER['REQUEST_METHOD'])) {
// ALERT - no current URL
throw new Exception('Script was not called through a webserver');
}
// Begin with a relative URL
$url = new self($_SERVER['PHP_SELF']);
$url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
$url->_host = $_SERVER['SERVER_NAME'];
$port = intval($_SERVER['SERVER_PORT']);
if ($url->_scheme == 'http' && $port != 80 ||
$url->_scheme == 'https' && $port != 443) {
$url->_port = $port;
}
return $url;
}
/**
* Returns the URL used to retrieve the current request.
*
* @return string
*/
public static function getRequestedURL()
{
return self::getRequested()->getUrl();
}
/**
* Returns a Net_URL2 instance representing the URL used to retrieve the
* current request.
*
* @return Net_URL2
*/
public static function getRequested()
{
if (!isset($_SERVER['REQUEST_METHOD'])) {
// ALERT - no current URL
throw new Exception('Script was not called through a webserver');
}
// Begin with a relative URL
$url = new self($_SERVER['REQUEST_URI']);
$url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
// Set host and possibly port
$url->setAuthority($_SERVER['HTTP_HOST']);
return $url;
}
/**
* Sets the specified option.
*
* @param string $optionName a self::OPTION_ constant
* @param mixed $value option value
*
* @return void
* @see self::OPTION_STRICT
* @see self::OPTION_USE_BRACKETS
* @see self::OPTION_ENCODE_KEYS
*/
function setOption($optionName, $value)
{
if (!array_key_exists($optionName, $this->_options)) {
return false;
}
$this->_options[$optionName] = $value;
}
/**
* Returns the value of the specified option.
*
* @param string $optionName The name of the option to retrieve
*
* @return mixed
*/
function getOption($optionName)
{
return isset($this->_options[$optionName])
? $this->_options[$optionName] : false;
}
}