Linkifier support many more urls, and less mismatches

This commit is contained in:
Craig Andrews 2009-08-21 15:56:15 -04:00
parent 70ca03f336
commit 871903a319

View File

@ -413,15 +413,16 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) {
// Start off with a regex // Start off with a regex
$regex = '#'. $regex = '#'.
'(?:^|\s+)('. '(?:^|\s+)('.
'(?:'. //Known protocols
'(?:'. '(?:'.
'(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'.
'|'. '|'.
'(?:mailto|aim|tel|xmpp):'. '(?:mailto|aim|tel|xmpp):'.
')?'. ')\S+'.
'(?:'. ')'.
'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
'|(?:'. '|(?:'. //IPv6
'(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. //IPv6 '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'.
'(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'. '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'.
'(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'. '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'.
'(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'. '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'.
@ -438,39 +439,46 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) {
'(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
'(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. '(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'. ':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'.
')|'. ')|(?:'. //DNS
'(?:[^.\s/:]+\.)+'. //DNS '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'.
'(?:museum|travel|onion|[a-z]{2,4})'.
')'.
'(?:[:/][^\s]*)?'.
')'. ')'.
'(?:[:/]\S*)?'.
')(?:$|\s+)'.
'#ix'; '#ix';
$callback_helper = curry(callback_helper, 3); //preg_match_all($regex,$text,$matches);
return preg_replace_callback($regex, $callback_helper($callback,$notice_id) ,$text); //print_r($matches);
//die("here");
return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text);
} }
function callback_helper($callback, $notice_id, $matches) { function callback_helper($matches, $callback, $notice_id) {
$spaces_left = (strlen($matches[0]) - strlen(ltrim($matches[0])));
$spaces_right = (strlen($matches[0]) - strlen(rtrim($matches[0])));
if(empty($notice_id)){ if(empty($notice_id)){
return $callback($matches[1],$notice_id); $result = call_user_func_array($callback,$matches[1]);
}else{ }else{
return $callback($matches[1]); $result = call_user_func_array($callback, array($matches[1],$notice_id) );
} }
return str_repeat(' ',$spaces_left) . $result . str_repeat(' ',$spaces_right);
} }
function curry($func, $arity) { function curry($fn) {
return create_function('', " //TODO switch to a PHP 5.3 function closure based approach if PHP 5.3 is used
\$args = func_get_args(); $args = func_get_args();
if(count(\$args) >= $arity) array_shift($args);
return call_user_func_array('$func', \$args); $id = uniqid('_partial');
\$args = var_export(\$args, 1); $GLOBALS[$id] = array($fn, $args);
return create_function('',' return create_function(
\$a = func_get_args(); '',
\$z = ' . \$args . '; '
\$a = array_merge(\$z,\$a); $args = func_get_args();
return call_user_func_array(\'$func\', \$a); return call_user_func_array(
$GLOBALS["'.$id.'"][0],
array_merge(
$args,
$GLOBALS["'.$id.'"][1]));
'); ');
");
} }
function common_linkify($url) { function common_linkify($url) {
@ -478,6 +486,11 @@ function common_linkify($url) {
// functions // functions
$url = htmlspecialchars_decode($url); $url = htmlspecialchars_decode($url);
if(strpos($url, '@')!==false && strpos($url, ':')===false){
//url is an email address without the mailto: protocol
return XMLStringer::estring('a', array('href' => "mailto:$url", 'rel' => 'external'), $url);
}
$canon = File_redirection::_canonUrl($url); $canon = File_redirection::_canonUrl($url);
$longurl_data = File_redirection::where($url); $longurl_data = File_redirection::where($url);