From 1301877dfe89c57c182246c0d7ba0ff6335fd17b Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 18 Mar 2010 17:08:19 -0700 Subject: [PATCH] OStatus discover fixes: * Subscription::start was sometimes passing users instead of profiles to hooks, which broke OStatus subscription notifications; now normalizing to profiles for processing. * H-card parsing would trigger a lot of PHP warnings and notices in hKit. Now suppressing warnings and notices for the duration of the call to keep them out of output when display_errors is on. * H-card parsing would trigger a PHP fatal error if the source page was not well-formed XML and Tidy was not present on the system. Switched normalization to use the PHP DOM module which is always present, as we have no need for Tidy's extra features here. * Trying to fetch avatars from Google profiles failed and triggered a PHP warning due to the relative URL not being resolved during h-card parsing. Now passing profile page URL into hKit by sneaking a tag in while we normalize the HTML source. * Profile pages without a "Link" header could trigger PHP notices due to a bad NULL -> array(NULL) conversion in LinkHeader::getLink(). Now checking that there was a return value before converting single return value into array. --- classes/Subscription.php | 8 +++ lib/activity.php | 4 +- plugins/OStatus/lib/discoveryhints.php | 87 +++++++++++++++++--------- plugins/OStatus/lib/linkheader.php | 24 +++---- 4 files changed, 79 insertions(+), 44 deletions(-) diff --git a/classes/Subscription.php b/classes/Subscription.php index 97c44a2e46..60c12cccc3 100644 --- a/classes/Subscription.php +++ b/classes/Subscription.php @@ -62,6 +62,14 @@ class Subscription extends Memcached_DataObject static function start($subscriber, $other) { + // @fixme should we enforce this as profiles in callers instead? + if ($subscriber instanceof User) { + $subscriber = $subscriber->getProfile(); + } + if ($other instanceof User) { + $other = $other->getProfile(); + } + if (!$subscriber->hasRight(Right::SUBSCRIBE)) { throw new Exception(_('You have been banned from subscribing.')); } diff --git a/lib/activity.php b/lib/activity.php index d84eabf7c4..c67d090f72 100644 --- a/lib/activity.php +++ b/lib/activity.php @@ -720,7 +720,7 @@ class ActivityObject } } - static function fromNotice($notice) + static function fromNotice(Notice $notice) { $object = new ActivityObject(); @@ -734,7 +734,7 @@ class ActivityObject return $object; } - static function fromProfile($profile) + static function fromProfile(Profile $profile) { $object = new ActivityObject(); diff --git a/plugins/OStatus/lib/discoveryhints.php b/plugins/OStatus/lib/discoveryhints.php index db13793dde..4da2ec0f1e 100644 --- a/plugins/OStatus/lib/discoveryhints.php +++ b/plugins/OStatus/lib/discoveryhints.php @@ -65,17 +65,22 @@ class DiscoveryHints { { common_debug("starting tidy"); - $body = self::_tidy($body); + $body = self::_tidy($body, $url); common_debug("done with tidy"); set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/'); require_once('hkit.class.php'); - $h = new hKit; + // hKit code is not clean for notices and warnings + $old = error_reporting(); + error_reporting($old & ~E_NOTICE & ~E_WARNING); + $h = new hKit; $hcards = $h->getByString('hcard', $body); + error_reporting($old); + if (empty($hcards)) { return array(); } @@ -144,39 +149,61 @@ class DiscoveryHints { return $hints; } - private static function _tidy($body) + /** + * hKit needs well-formed XML for its parsing. + * We'll take the HTML body here and normalize it to XML. + * + * @param string $body HTML document source, possibly not-well-formed + * @param string $url source URL + * @return string well-formed XML document source + * @throws Exception if HTML parsing failed. + */ + private static function _tidy($body, $url) { - if (function_exists('tidy_parse_string')) { - common_debug("Tidying with extension"); - $text = tidy_parse_string($body); - $text = tidy_clean_repair($text); - return $body; - } else if ($fullpath = self::_findProgram('tidy')) { - common_debug("Tidying with program $fullpath"); - $tempfile = tempnam('/tmp', 'snht'); // statusnet hcard tidy - file_put_contents($tempfile, $source); - exec("$fullpath -utf8 -indent -asxhtml -numeric -bare -quiet $tempfile", $tidy); - unlink($tempfile); - return implode("\n", $tidy); - } else { - common_debug("Not tidying."); - return $body; + if (empty($body)) { + throw new Exception("Empty HTML could not be parsed."); } - } + $dom = new DOMDocument(); - private static function _findProgram($name) - { - $path = $_ENV['PATH']; + // Some HTML errors will trigger warnings, but still work. + $old = error_reporting(); + error_reporting($old & ~E_WARNING); + + $ok = $dom->loadHTML($body); - $parts = explode(':', $path); - - foreach ($parts as $part) { - $fullpath = $part . '/' . $name; - if (is_executable($fullpath)) { - return $fullpath; + error_reporting($old); + + if ($ok) { + // hKit doesn't give us a chance to pass the source URL for + // resolving relative links, such as the avatar photo on a + // Google profile. We'll slip it into a tag if there's + // not already one present. + $bases = $dom->getElementsByTagName('base'); + if ($bases && $bases->length >= 1) { + $base = $bases->item(0); + if ($base->hasAttribute('href')) { + $base->setAttribute('href', $url); + } + } else { + $base = $dom->createElement('base'); + $base->setAttribute('href', $url); + $heads = $dom->getElementsByTagName('head'); + if ($heads || $heads->length) { + $head = $heads->item(0); + } else { + $head = $dom->createElement('head'); + $root = $dom->documentRoot; + if ($root->firstChild) { + $root->insertBefore($head, $root->firstChild); + } else { + $root->appendChild($head); + } + } + $head->appendChild($base); } + return $dom->saveXML(); + } else { + throw new Exception("Invalid HTML could not be parsed."); } - - return null; } } diff --git a/plugins/OStatus/lib/linkheader.php b/plugins/OStatus/lib/linkheader.php index 2f6c66dc97..afcd66d264 100644 --- a/plugins/OStatus/lib/linkheader.php +++ b/plugins/OStatus/lib/linkheader.php @@ -43,21 +43,21 @@ class LinkHeader static function getLink($response, $rel=null, $type=null) { $headers = $response->getHeader('Link'); + if ($headers) { + // Can get an array or string, so try to simplify the path + if (!is_array($headers)) { + $headers = array($headers); + } - // Can get an array or string, so try to simplify the path - if (!is_array($headers)) { - $headers = array($headers); - } + foreach ($headers as $header) { + $lh = new LinkHeader($header); - foreach ($headers as $header) { - $lh = new LinkHeader($header); - - if ((is_null($rel) || $lh->rel == $rel) && - (is_null($type) || $lh->type == $type)) { - return $lh->href; + if ((is_null($rel) || $lh->rel == $rel) && + (is_null($type) || $lh->type == $type)) { + return $lh->href; + } } } - return null; } -} \ No newline at end of file +}