Merge branch 'testing' into moveaccount

This commit is contained in:
Evan Prodromou 2011-01-03 10:56:13 -08:00
commit 0e8085c108
6 changed files with 184 additions and 52 deletions

16
README
View File

@ -1556,6 +1556,22 @@ cache: whether to cache the router in memcache (or another caching
router cached) or others who see strange behavior. You're unlikely
to need this unless you're a developer.
http
----
Settings for the HTTP client.
ssl_cafile: location of the CA file for SSL. If not set, won't verify
SSL peers. Default unset.
curl: Use cURL <http://curl.haxx.se/> for doing HTTP calls. You must
have the PHP curl extension installed for this to work.
proxy_host: Host to use for proxying HTTP requests. If unset, doesn't
do any HTTP proxy stuff. Default unset.
proxy_port: Port to use to connect to HTTP proxy host. Default null.
proxy_user: Username to use for authenticating to the HTTP proxy. Default null.
proxy_password: Password to use for authenticating to the HTTP proxy. Default null.
proxy_auth_scheme: Scheme to use for authenticating to the HTTP proxy. Default null.
Plugins
=======

View File

@ -331,6 +331,11 @@ $default =
'http' => // HTTP client settings when contacting other sites
array('ssl_cafile' => false, // To enable SSL cert validation, point to a CA bundle (eg '/usr/lib/ssl/certs/ca-certificates.crt')
'curl' => false, // Use CURL backend for HTTP fetches if available. (If not, PHP's socket streams will be used.)
'proxy_host' => null,
'proxy_port' => null,
'proxy_user' => null,
'proxy_password' => null,
'proxy_auth_scheme' => null,
),
'router' =>
array('cache' => true), // whether to cache the router object. Defaults to true, turn off for devel

View File

@ -149,6 +149,14 @@ class HTTPClient extends HTTP_Request2
$this->config['adapter'] = 'HTTP_Request2_Adapter_Curl';
}
foreach (array('host', 'port', 'user', 'password', 'auth_scheme') as $cf) {
$k = 'proxy_'.$cf;
$v = common_config('http', $k);
if (!empty($v)) {
$this->config[$k] = $v;
}
}
parent::__construct($url, $method, $config);
$this->setHeader('User-Agent', $this->userAgent());
}

View File

@ -65,7 +65,7 @@ class DeliciousBackupImporter extends QueueHandler
* and import to StatusNet as Bookmark activities.
*
* The document format is terrible. It consists of a <dl> with
* a bunch of <dt>'s, occasionally with <dd>'s.
* a bunch of <dt>'s, occasionally with <dd>'s adding descriptions.
* There are sometimes <p>'s lost inside.
*
* @param array $data pair of user, text
@ -99,6 +99,9 @@ class DeliciousBackupImporter extends QueueHandler
}
switch (strtolower($child->tagName)) {
case 'dt':
// <dt> nodes contain primary information about a bookmark.
// We can't import the current one just yet though, since
// it may be followed by a <dd>.
if (!empty($dt)) {
// No DD provided
$this->importBookmark($user, $dt);
@ -109,10 +112,13 @@ class DeliciousBackupImporter extends QueueHandler
case 'dd':
$dd = $child;
// This <dd> contains a description for the bookmark in
// the preceding <dt> node.
$saved = $this->importBookmark($user, $dt, $dd);
$dt = null;
$dd = null;
break;
case 'p':
common_log(LOG_INFO, 'Skipping the <p> in the <dl>.');
break;
@ -126,6 +132,14 @@ class DeliciousBackupImporter extends QueueHandler
$dt = $dd = null;
}
}
if (!empty($dt)) {
// There was a final bookmark without a description.
try {
$this->importBookmark($user, $dt);
} catch (Exception $e) {
common_log(LOG_ERR, $e->getMessage());
}
}
return true;
}
@ -148,24 +162,38 @@ class DeliciousBackupImporter extends QueueHandler
function importBookmark($user, $dt, $dd = null)
{
// We have to go squirrelling around in the child nodes
// on the off chance that we've received another <dt>
// as a child.
$as = $dt->getElementsByTagName('a');
for ($i = 0; $i < $dt->childNodes->length; $i++) {
$child = $dt->childNodes->item($i);
if ($child->nodeType == XML_ELEMENT_NODE) {
if ($child->tagName == 'dt' && !is_null($dd)) {
$this->importBookmark($user, $dt);
$this->importBookmark($user, $child, $dd);
return;
}
}
if ($as->length == 0) {
throw new ClientException(_("No <A> tag in a <DT>."));
}
$qm = QueueManager::get();
$a = $as->item(0);
$qm->enqueue(array($user, $dt, $dd), 'dlcsbkmk');
$private = $a->getAttribute('private');
if ($private != 0) {
throw new ClientException(_('Skipping private bookmark.'));
}
if (!empty($dd)) {
$description = $dd->nodeValue;
} else {
$description = null;
}
$addDate = $a->getAttribute('add_date');
$data = array(
'profile_id' => $user->id,
'title' => $a->nodeValue,
'description' => $description,
'url' => $a->getAttribute('href'),
'tags' => $a->getAttribute('tags'),
'created' => common_sql_date(intval($addDate))
);
$qm = QueueManager::get();
$qm->enqueue($data, 'dlcsbkmk');
}
/**
@ -188,9 +216,95 @@ class DeliciousBackupImporter extends QueueHandler
error_reporting($old);
if ($ok) {
foreach ($dom->getElementsByTagName('body') as $node) {
$this->fixListsIn($node);
}
return $dom;
} else {
return null;
}
}
function fixListsIn(DOMNode $body) {
$toFix = array();
foreach ($body->childNodes as $node) {
if ($node->nodeType == XML_ELEMENT_NODE) {
$el = strtolower($node->nodeName);
if ($el == 'dl') {
$toFix[] = $node;
}
}
}
foreach ($toFix as $node) {
$this->fixList($node);
}
}
function fixList(DOMNode $list) {
$toFix = array();
foreach ($list->childNodes as $node) {
if ($node->nodeType == XML_ELEMENT_NODE) {
$el = strtolower($node->nodeName);
if ($el == 'dt' || $el == 'dd') {
$toFix[] = $node;
}
if ($el == 'dl') {
// Sublist.
// Technically, these can only appear inside a <dd>...
$this->fixList($node);
}
}
}
foreach ($toFix as $node) {
$this->fixListItem($node);
}
}
function fixListItem(DOMNode $item) {
// The HTML parser in libxml2 doesn't seem to properly handle
// many cases of implied close tags, apparently because it doesn't
// understand the nesting rules specified in the HTML DTD.
//
// This leads to sequences of adjacent <dt>s or <dd>s being incorrectly
// interpreted as parent->child trees instead of siblings:
//
// When parsing this input: "<dt>aaa <dt>bbb"
// should be equivalent to: "<dt>aaa </dt><dt>bbb</dt>"
// but we're seeing instead: "<dt>aaa <dt>bbb</dt></dt>"
//
// It does at least know that going from dt to dd, or dd to dt,
// should make a break.
$toMove = array();
foreach ($item->childNodes as $node) {
if ($node->nodeType == XML_ELEMENT_NODE) {
$el = strtolower($node->nodeName);
if ($el == 'dt' || $el == 'dd') {
// dt & dd cannot contain each other;
// This node was incorrectly placed; move it up a level!
$toMove[] = $node;
}
if ($el == 'dl') {
// Sublist.
// Technically, these can only appear inside a <dd>.
$this->fixList($node);
}
}
}
$parent = $item->parentNode;
$next = $item->nextSibling;
foreach ($toMove as $node) {
$item->removeChild($node);
$parent->insertBefore($node, $next);
$this->fixListItem($node);
}
}
}

View File

@ -61,49 +61,29 @@ class DeliciousBookmarkImporter extends QueueHandler
/**
* Handle the data
*
* @param array $data array of user, dt, dd
* @param array $data associative array of user & bookmark info from DeliciousBackupImporter::importBookmark()
*
* @return boolean success value
*/
function handle($data)
{
list($user, $dt, $dd) = $data;
$profile = Profile::staticGet('id', $data['profile_id']);
$as = $dt->getElementsByTagName('a');
if ($as->length == 0) {
throw new ClientException(_("No <A> tag in a <DT>."));
try {
$saved = Bookmark::saveNew($profile,
$data['title'],
$data['url'],
$data['tags'],
$data['description'],
array('created' => $data['created'],
'distribute' => false));
} catch (ClientException $e) {
// Most likely a duplicate -- continue on with the rest!
common_log(LOG_ERR, "Error importing delicious bookmark to $data[url]: " . $e->getMessage());
return true;
}
$a = $as->item(0);
$private = $a->getAttribute('private');
if ($private != 0) {
throw new ClientException(_('Skipping private bookmark.'));
}
if (!empty($dd)) {
$description = $dd->nodeValue;
} else {
$description = null;
}
$title = $a->nodeValue;
$url = $a->getAttribute('href');
$tags = $a->getAttribute('tags');
$addDate = $a->getAttribute('add_date');
$created = common_sql_date(intval($addDate));
$saved = Bookmark::saveNew($user->getProfile(),
$title,
$url,
$tags,
$description,
array('created' => $created,
'distribute' => false));
return true;
}
}

View File

@ -48,6 +48,7 @@ if (!defined('STATUSNET')) {
class ImportdeliciousAction extends Action
{
protected $success = false;
private $inprogress = false;
/**
* Return the title of the page
@ -191,7 +192,13 @@ class ImportdeliciousAction extends Action
$qm = QueueManager::get();
$qm->enqueue(array(common_current_user(), $html), 'dlcsback');
$this->success = true;
if ($qm instanceof UnQueueManager) {
// No active queuing means we've actually just completed the job!
$this->success = true;
} else {
// We've fed data into background queues, and it's probably still running.
$this->inprogress = true;
}
$this->showPage();
@ -212,8 +219,10 @@ class ImportdeliciousAction extends Action
{
if ($this->success) {
$this->element('p', null,
_('Feed will be restored. '.
'Please wait a few minutes for results.'));
_('Bookmarks have been imported. Your bookmarks should now appear in search and your profile page.'));
} else if ($this->inprogress) {
$this->element('p', null,
_('Bookmarks are being imported. Please wait a few minutes for results.'));
} else {
$form = new ImportDeliciousForm($this);
$form->show();