rewrote short url stuff to handle new file/url classes (redirections, oembed, mimetypes, etc.)

This commit is contained in:
Robin Millette 2009-05-13 14:27:32 -04:00
parent d010d811ba
commit 3b7ee5a5f9
10 changed files with 383 additions and 237 deletions

View File

@ -158,7 +158,8 @@ class NewnoticeAction extends Action
$replyto = 'false';
}
$notice = Notice::saveNew($user->id, $content, 'web', 1,
// $notice = Notice::saveNew($user->id, $content_shortened, 'web', 1,
$notice = Notice::saveNew($user->id, $content_shortened, 'web', 1,
($replyto == 'false') ? null : $replyto);
if (is_string($notice)) {
@ -203,25 +204,12 @@ class NewnoticeAction extends Action
* @return void
*/
function saveUrls($notice) {
common_debug("Saving all URLs");
common_replace_urls_callback($notice->content, array($this, 'saveUrl'), $notice->id);
}
function saveUrl($data) {
list($url, $notice_id) = $data;
common_debug("Saving $url for $notice_id");
$file = File::staticGet('url', $url);
if (empty($file)) {
common_debug('unknown file/url');
$file = new File;
$file->url = $url;
$file->insert();
}
common_debug('File: ' . print_r($file, true));
$f2p = new File_to_post;
$f2p->file_id = $file->id;
$f2p->post_id = $notice_id;
$f2p->insert();
$zzz = File::processNew($url, $notice_id);
}
/**

View File

@ -20,6 +20,11 @@
if (!defined('LACONICA')) { exit(1); }
require_once INSTALLDIR.'/classes/Memcached_DataObject.php';
require_once INSTALLDIR.'/classes/File_redirection.php';
require_once INSTALLDIR.'/classes/File_oembed.php';
require_once INSTALLDIR.'/classes/File_thumbnail.php';
require_once INSTALLDIR.'/classes/File_to_post.php';
//require_once INSTALLDIR.'/classes/File_redirection.php';
/**
* Table Definition for file
@ -44,4 +49,64 @@ class File extends Memcached_DataObject
/* the code above is auto generated do not remove the tag below */
###END_AUTOCODE
function isProtected($url) {
return 'http://www.facebook.com/login.php' === $url;
}
function saveNew($redir_data, $given_url) {
$x = new File;
$x->url = $given_url;
if (!empty($redir_data['protected'])) $x->protected = $redir_data['protected'];
if (!empty($redir_data['title'])) $x->title = $redir_data['title'];
if (!empty($redir_data['type'])) $x->mimetype = $redir_data['type'];
if (!empty($redir_data['size'])) $x->size = intval($redir_data['size']);
if (isset($redir_data['time']) && $redir_data['time'] > 0) $x->date = intval($redir_data['time']);
$file_id = $x->insert();
if (isset($redir_data['type'])
&& ('text/html' === substr($redir_data['type'], 0, 9))
&& ($oembed_data = File_oembed::_getOembed($given_url))
&& isset($oembed_data['json'])) {
File_oembed::saveNew($oembed_data['json'], $file_id);
}
return $x;
}
function processNew($given_url, $notice_id) {
if (empty($given_url)) return -1; // error, no url to process
$given_url = File_redirection::_canonUrl($given_url);
if (empty($given_url)) return -1; // error, no url to process
$file = File::staticGet('url', $given_url);
if (empty($file->id)) {
$file_redir = File_redirection::staticGet('url', $given_url);
if (empty($file_redir->id)) {
$redir_data = File_redirection::where($given_url);
$redir_url = $redir_data['url'];
if ($redir_url === $given_url) {
$x = File::saveNew($redir_data, $given_url);
$file_id = $x->id;
} else {
$x = File::processNew($redir_url, $notice_id);
$file_id = $x->id;
File_redirection::saveNew($redir_data, $file_id, $given_url);
}
} else {
$file_id = $file_redir->file_id;
}
} else {
$file_id = $file->id;
$x = $file;
}
if (empty($x)) {
$x = File::staticGet($file_id);
if (empty($x)) die('Impossible!');
}
File_to_post::processNew($file_id, $notice_id);
return $x;
}
}

View File

@ -50,4 +50,47 @@ class File_oembed extends Memcached_DataObject
/* the code above is auto generated do not remove the tag below */
###END_AUTOCODE
function _getOembed($url, $maxwidth = 500, $maxheight = 400, $format = 'json') {
$cmd = 'http://oohembed.com/oohembed/?url=' . urlencode($url);
if (is_int($maxwidth)) $cmd .= "&maxwidth=$maxwidth";
if (is_int($maxheight)) $cmd .= "&maxheight=$maxheight";
if (is_string($format)) $cmd .= "&format=$format";
$oe = @file_get_contents($cmd);
if (false === $oe) return false;
return array($format => (('json' === $format) ? json_decode($oe, true) : $oe));
}
function saveNew($data, $file_id) {
$file_oembed = new File_oembed;
$file_oembed->file_id = $file_id;
$file_oembed->version = $data['version'];
$file_oembed->type = $data['type'];
if (!empty($data['provider_name'])) $file_oembed->provider = $data['provider_name'];
if (!isset($file_oembed->provider) && !empty($data['provide'])) $file_oembed->provider = $data['provider'];
if (!empty($data['provide_url'])) $file_oembed->provider_url = $data['provider_url'];
if (!empty($data['width'])) $file_oembed->width = intval($data['width']);
if (!empty($data['height'])) $file_oembed->height = intval($data['height']);
if (!empty($data['html'])) $file_oembed->html = $data['html'];
if (!empty($data['title'])) $file_oembed->title = $data['title'];
if (!empty($data['author_name'])) $file_oembed->author_name = $data['author_name'];
if (!empty($data['author_url'])) $file_oembed->author_url = $data['author_url'];
if (!empty($data['url'])) $file_oembed->url = $data['url'];
$file_oembed->insert();
if (!empty($data['thumbnail_url'])) {
$tn = new File_thumbnail;
$tn->file_id = $file_id;
$tn->url = $data['thumbnail_url'];
$tn->width = intval($data['thumbnail_width']);
$tn->height = intval($data['thumbnail_height']);
$tn->insert();
}
}
}

View File

@ -20,6 +20,11 @@
if (!defined('LACONICA')) { exit(1); }
require_once INSTALLDIR.'/classes/Memcached_DataObject.php';
require_once INSTALLDIR.'/classes/File.php';
require_once INSTALLDIR.'/classes/File_oembed.php';
define('USER_AGENT', 'Laconica user agent / file probe');
/**
* Table Definition for file_redirection
@ -42,4 +47,230 @@ class File_redirection extends Memcached_DataObject
/* the code above is auto generated do not remove the tag below */
###END_AUTOCODE
function _commonCurl($url, $redirs) {
$curlh = curl_init();
curl_setopt($curlh, CURLOPT_URL, $url);
curl_setopt($curlh, CURLOPT_AUTOREFERER, true); // # setup referer header when folowing redirects
curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 10); // # seconds to wait
curl_setopt($curlh, CURLOPT_MAXREDIRS, $redirs); // # max number of http redirections to follow
curl_setopt($curlh, CURLOPT_USERAGENT, USER_AGENT);
curl_setopt($curlh, CURLOPT_FOLLOWLOCATION, true); // Follow redirects
curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlh, CURLOPT_FILETIME, true);
curl_setopt($curlh, CURLOPT_HEADER, true); // Include header in output
return $curlh;
}
function _redirectWhere_imp($short_url, $redirs = 10, $protected = false) {
if ($redirs < 0) return false;
// let's see if we know this...
$a = File::staticGet('url', $short_url);
if (empty($a->id)) {
$b = File_redirection::staticGet('url', $short_url);
if (empty($b->id)) {
// we'll have to figure it out
} else {
// this is a redirect to $b->file_id
$a = File::staticGet($b->file_id);
$url = $a->url;
}
} else {
// this is a direct link to $a->url
$url = $a->url;
}
if (isset($url)) {
return $url;
}
$curlh = File_redirection::_commonCurl($short_url, $redirs);
// Don't include body in output
curl_setopt($curlh, CURLOPT_NOBODY, true);
curl_exec($curlh);
$info = curl_getinfo($curlh);
curl_close($curlh);
if (405 == $info['http_code']) {
$curlh = File_redirection::_commonCurl($short_url, $redirs);
curl_exec($curlh);
$info = curl_getinfo($curlh);
curl_close($curlh);
}
if (!empty($info['redirect_count']) && File::isProtected($info['url'])) {
return File_redirection::_redirectWhere_imp($short_url, $info['redirect_count'] - 1, true);
}
$ret = array('code' => $info['http_code']
, 'redirects' => $info['redirect_count']
, 'url' => $info['url']);
if (!empty($info['content_type'])) $ret['type'] = $info['content_type'];
if ($protected) $ret['protected'] = true;
if (!empty($info['download_content_length'])) $ret['size'] = $info['download_content_length'];
if (isset($info['filetime']) && ($info['filetime'] > 0)) $ret['time'] = $info['filetime'];
return $ret;
}
function where($in_url) {
$ret = File_redirection::_redirectWhere_imp($in_url);
return $ret;
}
function makeShort($long_url) {
$long_url = File_redirection::_canonUrl($long_url);
// do we already know this long_url and have a short redirection for it?
$file = new File;
$file_redir = new File_redirection;
$file->url = $long_url;
$file->joinAdd($file_redir);
$file->selectAdd('length(file_redirection.url) as len');
$file->limit(1);
$file->orderBy('len');
$file->find(true);
if (!empty($file->id)) {
return $file->url;
}
// if yet unknown, we must find a short url according to user settings
$short_url = File_redirection::_userMakeShort($long_url, common_current_user());
return $short_url;
}
function _userMakeShort($long_url, $user) {
if (empty($user)) {
// common current user does not find a user when called from the XMPP daemon
// therefore we'll set one here fix, so that XMPP given URLs may be shortened
$user->urlshorteningservice = 'ur1.ca';
}
$curlh = curl_init();
curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 20); // # seconds to wait
curl_setopt($curlh, CURLOPT_USERAGENT, 'Laconica');
curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true);
switch($user->urlshorteningservice) {
case 'ur1.ca':
require_once INSTALLDIR.'/lib/Shorturl_api.php';
$short_url_service = new LilUrl;
$short_url = $short_url_service->shorten($long_url);
break;
case '2tu.us':
$short_url_service = new TightUrl;
require_once INSTALLDIR.'/lib/Shorturl_api.php';
$short_url = $short_url_service->shorten($long_url);
break;
case 'ptiturl.com':
require_once INSTALLDIR.'/lib/Shorturl_api.php';
$short_url_service = new PtitUrl;
$short_url = $short_url_service->shorten($long_url);
break;
case 'bit.ly':
curl_setopt($curlh, CURLOPT_URL, 'http://bit.ly/api?method=shorten&long_url='.urlencode($long_url));
$short_url = current(json_decode(curl_exec($curlh))->results)->hashUrl;
break;
case 'is.gd':
curl_setopt($curlh, CURLOPT_URL, 'http://is.gd/api.php?longurl='.urlencode($long_url));
$short_url = curl_exec($curlh);
break;
case 'snipr.com':
curl_setopt($curlh, CURLOPT_URL, 'http://snipr.com/site/snip?r=simple&link='.urlencode($long_url));
$short_url = curl_exec($curlh);
break;
case 'metamark.net':
curl_setopt($curlh, CURLOPT_URL, 'http://metamark.net/api/rest/simple?long_url='.urlencode($long_url));
$short_url = curl_exec($curlh);
break;
case 'tinyurl.com':
curl_setopt($curlh, CURLOPT_URL, 'http://tinyurl.com/api-create.php?url='.urlencode($long_url));
$short_url = curl_exec($curlh);
break;
default:
$short_url = false;
}
curl_close($curlh);
if ($short_url) {
$short_url = (string)$short_url;
if(1) {
// store it
$file = File::staticGet('url', $long_url);
if (empty($file)) {
$redir_data = File_redirection::where($long_url);
$file = File::saveNew($redir_data, $long_url);
$file_id = $file->id;
if (!empty($redir_data['oembed']['json'])) {
File_oembed::saveNew($redir_data['oembed']['json'], $file_id);
}
} else {
$file_id = $file->id;
}
$file_redir = File_redirection::staticGet('url', $short_url);
if (empty($file_redir)) {
$file_redir = new File_redirection;
$file_redir->url = $short_url;
$file_redir->file_id = $file_id;
$file_redir->insert();
}
}
return $short_url;
}
return $long_url;
}
function _canonUrl($in_url, $default_scheme = 'http://') {
if (empty($in_url)) return false;
$out_url = $in_url;
$p = parse_url($out_url);
if (empty($p['host']) || empty($p['scheme'])) {
list($scheme) = explode(':', $in_url, 2);
switch ($scheme) {
case 'fax':
case 'tel':
$out_url = str_replace('.-()', '', $out_url);
break;
case 'mailto':
case 'aim':
case 'jabber':
case 'xmpp':
// don't touch anything
break;
default:
$out_url = $default_scheme . ltrim($out_url, '/');
$p = parse_url($out_url);
if (empty($p['scheme'])) return false;
break;
}
}
if (('ftp' == $p['scheme']) || ('http' == $p['scheme']) || ('https' == $p['scheme'])) {
if (empty($p['host'])) return false;
if (empty($p['path'])) {
$out_url .= '/';
}
}
return $out_url;
}
function saveNew($data, $file_id, $url) {
$file_redir = new File_redirection;
$file_redir->url = $url;
$file_redir->file_id = $file_id;
$file_redir->redirections = intval($data['redirects']);
$file_redir->httpcode = intval($data['code']);
$file_redir->insert();
}
}

View File

@ -40,4 +40,21 @@ class File_to_post extends Memcached_DataObject
/* the code above is auto generated do not remove the tag below */
###END_AUTOCODE
function processNew($file_id, $notice_id) {
static $seen = array();
if (empty($seen[$notice_id]) || !in_array($file_id, $seen[$notice_id])) {
$f2p = new File_to_post;
$f2p->file_id = $file_id;
$f2p->post_id = $notice_id;
$f2p->insert();
if (empty($seen[$notice_id])) {
$seen[$notice_id] = array($file_id);
} else {
$seen[$notice_id][] = $file_id;
}
}
}
}

View File

@ -124,7 +124,7 @@ class Notice extends Memcached_DataObject
$profile = Profile::staticGet($profile_id);
$final = common_shorten_links($content);
// $final = common_shorten_links($content);
if (!$profile) {
common_log(LOG_ERR, 'Problem saving notice. Unknown user.');
@ -167,8 +167,8 @@ class Notice extends Memcached_DataObject
$notice->reply_to = $reply_to;
$notice->created = common_sql_now();
$notice->content = $final;
$notice->rendered = common_render_content($final, $notice);
$notice->content = $content;
$notice->rendered = common_render_content($content, $notice);
$notice->source = $source;
$notice->uri = $uri;

View File

@ -1,67 +1,3 @@
[file]
id = 129
url = 2
mimetype = 2
size = 1
title = 2
date = 1
protected = 17
[file__keys]
id = K
url = U
[file_oembed]
id = 129
file_id = 1
version = 2
type = 2
provider = 2
provider_url = 2
width = 1
height = 1
html = 66
title = 2
author_name = 2
author_url = 2
url = 2
[file_oembed__keys]
id = K
file_id = U
[file_redirection]
id = 129
url = 2
file_id = 1
redirections = 1
httpcode = 1
[file_redirection__keys]
id = K
url = U
[file_thumbnail]
id = 129
file_id = 1
url = 2
width = 1
height = 1
[file_thumbnail__keys]
id = K
file_id = U
url = U
[file_to_post]
id = 129
file_id = 1
post_id = 1
[file_to_post__keys]
id = K
[avatar]
profile_id = 129
original = 17

View File

@ -53,62 +53,5 @@ file_id = file:id
[file_to_post]
file_id = file:id
post_id = post:id
[avatar]
profile_id = profile:id
[user]
id = profile:id
carrier = sms_carrier:id
[remote_profile]
id = profile:id
[notice]
profile_id = profile:id
reply_to = notice:id
[reply]
notice_id = notice:id
profile_id = profile:id
[token]
consumer_key = consumer:consumer_key
[nonce]
consumer_key,token = token:consumer_key,token
[user_openid]
user_id = user:id
[confirm_address]
user_id = user:id
[remember_me]
user_id = user:id
[queue_item]
notice_id = notice:id
[subscription]
subscriber = profile:id
subscribed = profile:id
[fave]
notice_id = notice:id
user_id = user:id
[file_oembed]
file_id = file:id
[file_redirection]
file_id = file:id
[file_thumbnail]
file_id = file:id
[file_to_post]
file_id = file:id
post_id = post:id
post_id = notice:id

View File

@ -22,6 +22,7 @@ if (!defined('LACONICA')) { exit(1); }
class ShortUrlApi
{
protected $service_url;
protected $long_limit = 27;
function __construct($service_url)
{
@ -39,7 +40,7 @@ class ShortUrlApi
}
private function is_long($url) {
return strlen($url) >= 30;
return strlen($url) >= $this->long_limit;
}
protected function http_post($data) {

View File

@ -466,10 +466,10 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) {
$url = (mb_strpos($orig_url, htmlspecialchars($url)) === FALSE) ? $url:htmlspecialchars($url);
// Call user specified func
if (isset($notice_id)) {
$modified_url = call_user_func($callback, array($url, $notice_id));
} else {
if (empty($notice_id)) {
$modified_url = call_user_func($callback, $url);
} else {
$modified_url = call_user_func($callback, array($url, $notice_id));
}
// Replace it!
@ -485,107 +485,29 @@ function common_linkify($url) {
// It comes in special'd, so we unspecial it before passing to the stringifying
// functions
$url = htmlspecialchars_decode($url);
$display = $url;
$url = (!preg_match('#^([a-z]+://|(mailto|aim|tel):)#i', $url)) ? 'http://'.$url : $url;
$attrs = array('href' => $url, 'rel' => 'external');
if ($longurl = common_longurl($url)) {
$attrs['title'] = $longurl;
$display = File_redirection::_canonUrl($url);
$longurl_data = File_redirection::where($url);
if (is_array($longurl_data)) {
$longurl = $longurl_data['url'];
} elseif (is_string($longurl_data)) {
$longurl = $longurl_data;
} else {
die('impossible to linkify');
}
$attrs = array('href' => $longurl, 'rel' => 'external');
if(0){
if ($longurl !== $url) {
$attrs['title'] = $longurl;
}
}
return XMLStringer::estring('a', $attrs, $display);
}
function common_longurl($short_url)
{
$long_url = common_shorten_link($short_url, true);
if ($long_url === $short_url) return false;
return $long_url;
}
function common_longurl2($uri)
{
$uri_e = urlencode($uri);
$longurl = unserialize(file_get_contents("http://api.longurl.org/v1/expand?format=php&url=$uri_e"));
if (empty($longurl['long_url']) || $uri === $longurl['long_url']) return false;
return stripslashes($longurl['long_url']);
}
function common_shorten_links($text)
{
if (mb_strlen($text) <= 140) return $text;
static $cache = array();
if (isset($cache[$text])) return $cache[$text];
// \s = not a horizontal whitespace character (since PHP 5.2.4)
return $cache[$text] = common_replace_urls_callback($text, 'common_shorten_link');;
}
function common_shorten_link($url, $reverse = false)
{
static $url_cache = array();
if ($reverse) return isset($url_cache[$url]) ? $url_cache[$url] : $url;
$user = common_current_user();
if (!isset($user)) {
// common current user does not find a user when called from the XMPP daemon
// therefore we'll set one here fix, so that XMPP given URLs may be shortened
$user->urlshorteningservice = 'ur1.ca';
}
$curlh = curl_init();
curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 20); // # seconds to wait
curl_setopt($curlh, CURLOPT_USERAGENT, 'Laconica');
curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true);
switch($user->urlshorteningservice) {
case 'ur1.ca':
$short_url_service = new LilUrl;
$short_url = $short_url_service->shorten($url);
break;
case '2tu.us':
$short_url_service = new TightUrl;
$short_url = $short_url_service->shorten($url);
break;
case 'ptiturl.com':
$short_url_service = new PtitUrl;
$short_url = $short_url_service->shorten($url);
break;
case 'bit.ly':
curl_setopt($curlh, CURLOPT_URL, 'http://bit.ly/api?method=shorten&long_url='.urlencode($url));
$short_url = current(json_decode(curl_exec($curlh))->results)->hashUrl;
break;
case 'is.gd':
curl_setopt($curlh, CURLOPT_URL, 'http://is.gd/api.php?longurl='.urlencode($url));
$short_url = curl_exec($curlh);
break;
case 'snipr.com':
curl_setopt($curlh, CURLOPT_URL, 'http://snipr.com/site/snip?r=simple&link='.urlencode($url));
$short_url = curl_exec($curlh);
break;
case 'metamark.net':
curl_setopt($curlh, CURLOPT_URL, 'http://metamark.net/api/rest/simple?long_url='.urlencode($url));
$short_url = curl_exec($curlh);
break;
case 'tinyurl.com':
curl_setopt($curlh, CURLOPT_URL, 'http://tinyurl.com/api-create.php?url='.urlencode($url));
$short_url = curl_exec($curlh);
break;
default:
$short_url = false;
}
curl_close($curlh);
if ($short_url) {
$url_cache[(string)$short_url] = $url;
return (string)$short_url;
}
return $url;
return common_replace_urls_callback($text, array('File_redirection', 'makeShort'));
}
function common_xml_safe_str($str)