Latest Mf2/Parser.php is compatible with PHP5.3
This commit is contained in:
parent
ee41bc560c
commit
c2998e26ec
|
@ -69,7 +69,7 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
|
||||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
curl_setopt($ch, CURLOPT_HEADER, 0);
|
||||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
||||||
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
|
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
|
||||||
$response = curl_exec($ch);
|
$html = curl_exec($ch);
|
||||||
$info = $curlInfo = curl_getinfo($ch);
|
$info = $curlInfo = curl_getinfo($ch);
|
||||||
curl_close($ch);
|
curl_close($ch);
|
||||||
|
|
||||||
|
@ -78,7 +78,6 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = mb_substr($response, $info['header_size']);
|
|
||||||
return parse($html, $url, $convertClassic);
|
return parse($html, $url, $convertClassic);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,12 +122,14 @@ function unicodeTrim($str) {
|
||||||
* @return string|array The prefixed name of the first microfomats class found or false
|
* @return string|array The prefixed name of the first microfomats class found or false
|
||||||
*/
|
*/
|
||||||
function mfNamesFromClass($class, $prefix='h-') {
|
function mfNamesFromClass($class, $prefix='h-') {
|
||||||
$class = str_replace([' ', ' ', "\n"], ' ', $class);
|
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
|
||||||
$classes = explode(' ', $class);
|
$classes = explode(' ', $class);
|
||||||
$matches = array();
|
$matches = array();
|
||||||
|
|
||||||
foreach ($classes as $classname) {
|
foreach ($classes as $classname) {
|
||||||
if (strpos($classname, $prefix) === 0 && $classname !== $prefix) {
|
$compare_classname = strtolower(' ' . $classname);
|
||||||
|
$compare_prefix = strtolower(' ' . $prefix);
|
||||||
|
if (stristr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
|
||||||
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
|
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -149,10 +150,11 @@ function nestedMfPropertyNamesFromClass($class) {
|
||||||
$prefixes = array('p-', 'u-', 'dt-', 'e-');
|
$prefixes = array('p-', 'u-', 'dt-', 'e-');
|
||||||
$propertyNames = array();
|
$propertyNames = array();
|
||||||
|
|
||||||
$class = str_replace([' ', ' ', "\n"], ' ', $class);
|
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
|
||||||
foreach (explode(' ', $class) as $classname) {
|
foreach (explode(' ', $class) as $classname) {
|
||||||
foreach ($prefixes as $prefix) {
|
foreach ($prefixes as $prefix) {
|
||||||
if (strpos($classname, $prefix) === 0 and $classname !== $prefix) {
|
$compare_classname = strtolower(' ' . $classname);
|
||||||
|
if (stristr($compare_classname, $prefix) && ($compare_classname != $prefix)) {
|
||||||
$propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix)));
|
$propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,7 +193,7 @@ function convertTimeFormat($time) {
|
||||||
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
|
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
|
||||||
|
|
||||||
// if no am/pm specified
|
// if no am/pm specified
|
||||||
if ( empty($matches[4]) ) {
|
if (empty($matches[4])) {
|
||||||
return $time;
|
return $time;
|
||||||
}
|
}
|
||||||
// else am/pm specified
|
// else am/pm specified
|
||||||
|
@ -202,31 +204,27 @@ function convertTimeFormat($time) {
|
||||||
$hh = $matches[1];
|
$hh = $matches[1];
|
||||||
|
|
||||||
// add 12 to the pm hours
|
// add 12 to the pm hours
|
||||||
if ( $meridiem == 'pm' && ($hh < 12) )
|
if ($meridiem == 'pm' && ($hh < 12)) {
|
||||||
{
|
|
||||||
$hh += 12;
|
$hh += 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
|
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
|
||||||
|
|
||||||
// minutes
|
// minutes
|
||||||
$mm = ( empty($matches[2]) ) ? '00' : $matches[2];
|
$mm = (empty($matches[2]) ) ? '00' : $matches[2];
|
||||||
|
|
||||||
// seconds, only if supplied
|
// seconds, only if supplied
|
||||||
if ( !empty($matches[3]) )
|
if (!empty($matches[3])) {
|
||||||
{
|
|
||||||
$ss = $matches[3];
|
$ss = $matches[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( empty($ss) ) {
|
if (empty($ss)) {
|
||||||
return sprintf('%s:%s', $hh, $mm);
|
return sprintf('%s:%s', $hh, $mm);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return sprintf('%s:%s:%s', $hh, $mm, $ss);
|
return sprintf('%s:%s:%s', $hh, $mm, $ss);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -293,6 +291,11 @@ class Parser {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ignore <template> elements as per the HTML5 spec
|
||||||
|
foreach ($this->xpath->query('//template') as $templateEl) {
|
||||||
|
$templateEl->parentNode->removeChild($templateEl);
|
||||||
|
}
|
||||||
|
|
||||||
$this->baseurl = $baseurl;
|
$this->baseurl = $baseurl;
|
||||||
$this->doc = $doc;
|
$this->doc = $doc;
|
||||||
|
@ -320,7 +323,33 @@ class Parser {
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function resolveChildUrls(DOMElement $el) {
|
||||||
|
$hyperlinkChildren = $this->xpath->query('.//*[@src or @href or @data]', $el);
|
||||||
|
|
||||||
|
foreach ($hyperlinkChildren as $child) {
|
||||||
|
if ($child->hasAttribute('href'))
|
||||||
|
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
|
||||||
|
if ($child->hasAttribute('src'))
|
||||||
|
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
|
||||||
|
if ($child->hasAttribute('data'))
|
||||||
|
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function textContent(DOMElement $el) {
|
||||||
|
$this->resolveChildUrls($el);
|
||||||
|
|
||||||
|
$clonedEl = $el->cloneNode(true);
|
||||||
|
|
||||||
|
foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) {
|
||||||
|
$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src'));
|
||||||
|
$imgEl->parentNode->replaceChild($newNode, $imgEl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $clonedEl->textContent;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: figure out if this has problems with sms: and geo: URLs
|
// TODO: figure out if this has problems with sms: and geo: URLs
|
||||||
public function resolveUrl($url) {
|
public function resolveUrl($url) {
|
||||||
// If the URL is seriously malformed it’s probably beyond the scope of this
|
// If the URL is seriously malformed it’s probably beyond the scope of this
|
||||||
|
@ -354,7 +383,7 @@ class Parser {
|
||||||
// Process value-class stuff
|
// Process value-class stuff
|
||||||
$val = '';
|
$val = '';
|
||||||
foreach ($valueClassElements as $el) {
|
foreach ($valueClassElements as $el) {
|
||||||
$val .= $el->textContent;
|
$val .= $this->textContent($el);
|
||||||
}
|
}
|
||||||
|
|
||||||
return unicodeTrim($val);
|
return unicodeTrim($val);
|
||||||
|
@ -398,7 +427,7 @@ class Parser {
|
||||||
} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') {
|
} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') {
|
||||||
$pValue = $p->getAttribute('value');
|
$pValue = $p->getAttribute('value');
|
||||||
} else {
|
} else {
|
||||||
$pValue = unicodeTrim($p->textContent);
|
$pValue = unicodeTrim($this->textContent($p));
|
||||||
}
|
}
|
||||||
|
|
||||||
return $pValue;
|
return $pValue;
|
||||||
|
@ -433,7 +462,7 @@ class Parser {
|
||||||
} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) {
|
} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) {
|
||||||
return $u->getAttribute('value');
|
return $u->getAttribute('value');
|
||||||
} else {
|
} else {
|
||||||
return unicodeTrim($u->textContent);
|
return unicodeTrim($this->textContent($u));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -595,17 +624,8 @@ class Parser {
|
||||||
|
|
||||||
// Expand relative URLs within children of this element
|
// Expand relative URLs within children of this element
|
||||||
// TODO: as it is this is not relative to only children, make this .// and rerun tests
|
// TODO: as it is this is not relative to only children, make this .// and rerun tests
|
||||||
$hyperlinkChildren = $this->xpath->query('//*[@src or @href or @data]', $e);
|
$this->resolveChildUrls($e);
|
||||||
|
|
||||||
foreach ($hyperlinkChildren as $child) {
|
|
||||||
if ($child->hasAttribute('href'))
|
|
||||||
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
|
|
||||||
if ($child->hasAttribute('src'))
|
|
||||||
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
|
|
||||||
if ($child->hasAttribute('data'))
|
|
||||||
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
|
|
||||||
}
|
|
||||||
|
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($e->childNodes as $node) {
|
foreach ($e->childNodes as $node) {
|
||||||
$html .= $node->C14N();
|
$html .= $node->C14N();
|
||||||
|
@ -613,7 +633,7 @@ class Parser {
|
||||||
|
|
||||||
return array(
|
return array(
|
||||||
'html' => $html,
|
'html' => $html,
|
||||||
'value' => unicodeTrim($e->textContent)
|
'value' => unicodeTrim($this->textContent($e))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -994,7 +1014,8 @@ class Parser {
|
||||||
'hrecipe' => 'h-recipe',
|
'hrecipe' => 'h-recipe',
|
||||||
'hresume' => 'h-resume',
|
'hresume' => 'h-resume',
|
||||||
'hevent' => 'h-event',
|
'hevent' => 'h-event',
|
||||||
'hreview' => 'h-review'
|
'hreview' => 'h-review',
|
||||||
|
'hproduct' => 'h-product'
|
||||||
);
|
);
|
||||||
|
|
||||||
public $classicPropertyMap = array(
|
public $classicPropertyMap = array(
|
||||||
|
@ -1086,6 +1107,17 @@ class Parser {
|
||||||
'best' => 'p-best',
|
'best' => 'p-best',
|
||||||
'worst' => 'p-worst',
|
'worst' => 'p-worst',
|
||||||
'description' => 'p-description'
|
'description' => 'p-description'
|
||||||
|
),
|
||||||
|
'hproduct' => array(
|
||||||
|
'fn' => 'p-name',
|
||||||
|
'photo' => 'u-photo',
|
||||||
|
'brand' => 'p-brand',
|
||||||
|
'category' => 'p-category',
|
||||||
|
'description' => 'p-description',
|
||||||
|
'identifier' => 'u-identifier',
|
||||||
|
'url' => 'u-url',
|
||||||
|
'review' => 'p-review h-review',
|
||||||
|
'price' => 'p-price'
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user