2018-03-12 12:23:55 +09:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace Masterminds\HTML5\Tests\Parser;
|
|
|
|
|
|
|
|
use Masterminds\HTML5\Parser\UTF8Utils;
|
|
|
|
use Masterminds\HTML5\Parser\Scanner;
|
|
|
|
use Masterminds\HTML5\Parser\Tokenizer;
|
|
|
|
|
|
|
|
class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
|
|
|
|
{
|
|
|
|
// ================================================================
|
|
|
|
// Additional assertions.
|
|
|
|
// ================================================================
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tests that an event matches both the event type and the expected value.
|
|
|
|
*
|
|
|
|
* @param string $type
|
|
|
|
* Expected event type
|
|
|
|
* @param string $expects
|
|
|
|
* The value expected in $event['data'][0]
|
|
|
|
*/
|
|
|
|
public function assertEventEquals($type, $expects, $event)
|
|
|
|
{
|
|
|
|
$this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true));
|
|
|
|
if (is_array($expects)) {
|
|
|
|
$this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ': ' . print_r($event, true));
|
|
|
|
} else {
|
2020-08-04 13:31:44 +09:00
|
|
|
$d = (is_array($event['data']) ? $event['data'][0] : null);
|
|
|
|
$this->assertEquals($expects, $d, "Event $type should equal $expects: " . print_r($event, true));
|
2018-03-12 12:23:55 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assert that a given event is 'error'.
|
|
|
|
*/
|
|
|
|
public function assertEventError($event)
|
|
|
|
{
|
|
|
|
$this->assertEquals('error', $event['name'], 'Expected error for event: ' . print_r($event, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Asserts that all of the tests are good.
|
|
|
|
*
|
|
|
|
* This loops through a map of tests/expectations and runs a few assertions on each test.
|
|
|
|
*
|
|
|
|
* Checks:
|
|
|
|
* - depth (if depth is > 0)
|
|
|
|
* - event name
|
|
|
|
* - matches on event 0.
|
|
|
|
*/
|
|
|
|
protected function isAllGood($name, $depth, $tests, $debug = false)
|
|
|
|
{
|
|
|
|
foreach ($tests as $try => $expects) {
|
|
|
|
if ($debug) {
|
|
|
|
fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true));
|
|
|
|
}
|
|
|
|
$e = $this->parse($try);
|
|
|
|
if ($depth > 0) {
|
|
|
|
$this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true));
|
|
|
|
}
|
|
|
|
$this->assertEventEquals($name, $expects, $e->get(0));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ================================================================
|
|
|
|
// Utility functions.
|
|
|
|
// ================================================================
|
|
|
|
public function testParse()
|
|
|
|
{
|
|
|
|
list($tok, $events) = $this->createTokenizer('');
|
|
|
|
|
|
|
|
$tok->parse();
|
|
|
|
$e1 = $events->get(0);
|
|
|
|
|
|
|
|
$this->assertEquals(1, $events->Depth());
|
|
|
|
$this->assertEquals('eof', $e1['name']);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testWhitespace()
|
|
|
|
{
|
|
|
|
$spaces = ' ';
|
|
|
|
list($tok, $events) = $this->createTokenizer($spaces);
|
|
|
|
|
|
|
|
$tok->parse();
|
|
|
|
|
|
|
|
$this->assertEquals(2, $events->depth());
|
|
|
|
|
|
|
|
$e1 = $events->get(0);
|
|
|
|
|
|
|
|
$this->assertEquals('text', $e1['name']);
|
|
|
|
$this->assertEquals($spaces, $e1['data'][0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testCharacterReference()
|
|
|
|
{
|
|
|
|
$good = array(
|
|
|
|
'&' => '&',
|
|
|
|
'<' => '<',
|
|
|
|
'&' => '&',
|
|
|
|
'&' => '&',
|
|
|
|
);
|
|
|
|
$this->isAllGood('text', 2, $good);
|
|
|
|
|
|
|
|
// Test with broken charref
|
|
|
|
$str = '&foo';
|
|
|
|
$events = $this->parse($str);
|
|
|
|
$e1 = $events->get(0);
|
|
|
|
$this->assertEquals('error', $e1['name']);
|
|
|
|
|
|
|
|
$str = 'oo';
|
|
|
|
$events = $this->parse($str);
|
|
|
|
$e1 = $events->get(0);
|
|
|
|
$this->assertEquals('error', $e1['name']);
|
|
|
|
|
|
|
|
$str = '&#foo';
|
|
|
|
$events = $this->parse($str);
|
|
|
|
$e1 = $events->get(0);
|
|
|
|
$this->assertEquals('error', $e1['name']);
|
|
|
|
|
|
|
|
// FIXME: Once the text processor is done, need to verify that the
|
|
|
|
// tokens are transformed correctly into text.
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testBogusComment()
|
|
|
|
{
|
|
|
|
$bogus = array(
|
|
|
|
'</+this is a bogus comment. +>',
|
|
|
|
'<!+this is a bogus comment. !>',
|
|
|
|
'<!D OCTYPE foo bar>',
|
|
|
|
'<!DOCTYEP foo bar>',
|
|
|
|
'<![CADATA[ TEST ]]>',
|
|
|
|
'<![CDATA Hello ]]>',
|
|
|
|
'<![CDATA[ Hello [[>',
|
|
|
|
'<!CDATA[[ test ]]>',
|
|
|
|
'<![CDATA[',
|
|
|
|
'<![CDATA[hellooooo hello',
|
|
|
|
'<? Hello World ?>',
|
|
|
|
'<? Hello World',
|
|
|
|
);
|
|
|
|
foreach ($bogus as $str) {
|
|
|
|
$events = $this->parse($str);
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('comment', $str, $events->get(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testEndTag()
|
|
|
|
{
|
|
|
|
$succeed = array(
|
|
|
|
'</a>' => 'a',
|
|
|
|
'</test>' => 'test',
|
|
|
|
'</test
|
|
|
|
>' => 'test',
|
|
|
|
'</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
|
|
|
|
// See 8.2.4.10, which requires this and does not say error.
|
|
|
|
'</a<b>' => 'a<b',
|
|
|
|
);
|
|
|
|
$this->isAllGood('endTag', 2, $succeed);
|
|
|
|
|
|
|
|
// Recoverable failures
|
|
|
|
$fail = array(
|
|
|
|
'</a class="monkey">' => 'a',
|
|
|
|
'</a <b>' => 'a',
|
|
|
|
'</a <b <c>' => 'a',
|
|
|
|
'</a is the loneliest letter>' => 'a',
|
|
|
|
'</a' => 'a',
|
|
|
|
);
|
|
|
|
foreach ($fail as $test => $result) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(3, $events->depth());
|
|
|
|
// Should have triggered an error.
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
// Should have tried to parse anyway.
|
|
|
|
$this->assertEventEquals('endTag', $result, $events->get(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// BogoComments
|
|
|
|
$comments = array(
|
|
|
|
'</>' => '</>',
|
|
|
|
'</ >' => '</ >',
|
|
|
|
'</ a>' => '</ a>',
|
|
|
|
);
|
|
|
|
foreach ($comments as $test => $result) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(3, $events->depth());
|
|
|
|
|
|
|
|
// Should have triggered an error.
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
|
|
|
|
// Should have tried to parse anyway.
|
|
|
|
$this->assertEventEquals('comment', $result, $events->get(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testComment()
|
|
|
|
{
|
|
|
|
$good = array(
|
|
|
|
'<!--easy-->' => 'easy',
|
|
|
|
'<!-- 1 > 0 -->' => ' 1 > 0 ',
|
|
|
|
'<!-- --$i -->' => ' --$i ',
|
|
|
|
'<!----$i-->' => '--$i',
|
|
|
|
"<!--\nHello World.\na-->" => "\nHello World.\na",
|
|
|
|
'<!-- <!-- -->' => ' <!-- ',
|
|
|
|
);
|
|
|
|
foreach ($good as $test => $expected) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEventEquals('comment', $expected, $events->get(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
$fail = array(
|
|
|
|
'<!-->' => '',
|
|
|
|
'<!--Hello' => 'Hello',
|
|
|
|
"<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
|
|
|
|
'<!--' => '',
|
|
|
|
);
|
|
|
|
foreach ($fail as $test => $expected) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(3, $events->depth());
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('comment', $expected, $events->get(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testCDATASection()
|
|
|
|
{
|
|
|
|
$good = array(
|
|
|
|
'<![CDATA[ This is a test. ]]>' => ' This is a test. ',
|
|
|
|
'<![CDATA[CDATA]]>' => 'CDATA',
|
|
|
|
'<![CDATA[ ]] > ]]>' => ' ]] > ',
|
|
|
|
'<![CDATA[ ]]>' => ' ',
|
|
|
|
);
|
|
|
|
$this->isAllGood('cdata', 2, $good);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testDoctype()
|
|
|
|
{
|
|
|
|
$good = array(
|
|
|
|
'<!DOCTYPE html>' => array(
|
|
|
|
'html',
|
|
|
|
0,
|
|
|
|
null,
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<!doctype html>' => array(
|
|
|
|
'html',
|
|
|
|
0,
|
|
|
|
null,
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<!DocType html>' => array(
|
|
|
|
'html',
|
|
|
|
0,
|
|
|
|
null,
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<!DOCTYPE\nhtml>" => array(
|
|
|
|
'html',
|
|
|
|
0,
|
|
|
|
null,
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<!DOCTYPE\fhtml>" => array(
|
|
|
|
'html',
|
|
|
|
0,
|
|
|
|
null,
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE html PUBLIC "foo bar">' => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_PUBLIC,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<!DOCTYPE html PUBLIC 'foo bar'>" => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_PUBLIC,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE html PUBLIC "foo bar" >' => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_PUBLIC,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_PUBLIC,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE html SYSTEM "foo bar">' => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_SYSTEM,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<!DOCTYPE html SYSTEM 'foo bar'>" => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_SYSTEM,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE html SYSTEM "foo/bar" >' => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_SYSTEM,
|
|
|
|
'foo/bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_SYSTEM,
|
|
|
|
'foo bar',
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
);
|
|
|
|
$this->isAllGood('doctype', 2, $good);
|
|
|
|
|
|
|
|
$bad = array(
|
|
|
|
'<!DOCTYPE>' => array(
|
|
|
|
null,
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE >' => array(
|
|
|
|
null,
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo PUB' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo PUB>' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo PUB "Looks good">' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo SYSTME "Looks good"' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
|
|
|
|
// Can't tell whether these are ids or ID types, since the context is chopped.
|
|
|
|
'<!DOCTYPE foo PUBLIC' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo PUBLIC>' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo SYSTEM' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE foo SYSTEM>' => array(
|
|
|
|
'foo',
|
|
|
|
EventStack::DOCTYPE_NONE,
|
|
|
|
null,
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
|
|
|
|
'<!DOCTYPE html SYSTEM "foo bar"' => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_SYSTEM,
|
|
|
|
'foo bar',
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
|
|
|
|
'html',
|
|
|
|
EventStack::DOCTYPE_SYSTEM,
|
|
|
|
'foo bar',
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
);
|
|
|
|
foreach ($bad as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
// fprintf(STDOUT, $test . PHP_EOL);
|
|
|
|
$this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('doctype', $expects, $events->get(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testProcessorInstruction()
|
|
|
|
{
|
|
|
|
$good = array(
|
|
|
|
'<?hph ?>' => 'hph',
|
|
|
|
'<?hph echo "Hello World"; ?>' => array(
|
|
|
|
'hph',
|
|
|
|
'echo "Hello World"; ',
|
|
|
|
),
|
|
|
|
"<?hph \necho 'Hello World';\n?>" => array(
|
|
|
|
'hph',
|
|
|
|
"echo 'Hello World';\n",
|
|
|
|
),
|
|
|
|
);
|
|
|
|
$this->isAllGood('pi', 2, $good);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This tests just simple tags.
|
|
|
|
*/
|
|
|
|
public function testSimpleTags()
|
|
|
|
{
|
|
|
|
$open = array(
|
|
|
|
'<foo>' => 'foo',
|
|
|
|
'<FOO>' => 'foo',
|
|
|
|
'<fOO>' => 'foo',
|
|
|
|
'<foo >' => 'foo',
|
|
|
|
"<foo\n\n\n\n>" => 'foo',
|
|
|
|
'<foo:bar>' => 'foo:bar',
|
|
|
|
);
|
|
|
|
$this->isAllGood('startTag', 2, $open);
|
|
|
|
|
|
|
|
$selfClose = array(
|
|
|
|
'<foo/>' => 'foo',
|
|
|
|
'<FOO/>' => 'foo',
|
|
|
|
'<foo />' => 'foo',
|
|
|
|
"<foo\n\n\n\n/>" => 'foo',
|
|
|
|
'<foo:bar/>' => 'foo:bar',
|
|
|
|
);
|
|
|
|
foreach ($selfClose as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(2, $events->depth(), "Counting events for '$test'" . print_r($events, true));
|
|
|
|
$this->assertEventEquals('startTag', $expects, $events->get(0));
|
|
|
|
$event = $events->get(0);
|
|
|
|
$this->assertTrue($event['data'][2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
$bad = array(
|
|
|
|
'<foo' => 'foo',
|
|
|
|
'<foo ' => 'foo',
|
|
|
|
'<foo/' => 'foo',
|
|
|
|
'<foo /' => 'foo',
|
|
|
|
);
|
|
|
|
|
|
|
|
foreach ($bad as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', $expects, $events->get(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testTagsWithAttributeAndMissingName()
|
|
|
|
{
|
|
|
|
$cases = array(
|
|
|
|
'<id="top_featured">' => 'id',
|
|
|
|
'<color="white">' => 'color',
|
|
|
|
"<class='neaktivni_stranka'>" => 'class',
|
|
|
|
'<bgcolor="white">' => 'bgcolor',
|
|
|
|
'<class="nom">' => 'class',
|
|
|
|
);
|
|
|
|
|
|
|
|
foreach ($cases as $html => $expected) {
|
|
|
|
$events = $this->parse($html);
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventError($events->get(1));
|
|
|
|
$this->assertEventError($events->get(2));
|
|
|
|
$this->assertEventEquals('startTag', $expected, $events->get(3));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(4));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testTagNotClosedAfterTagName()
|
|
|
|
{
|
|
|
|
$cases = array(
|
|
|
|
'<noscript<img>' => array(
|
|
|
|
'noscript',
|
|
|
|
'img',
|
|
|
|
),
|
|
|
|
'<center<a>' => array(
|
|
|
|
'center',
|
|
|
|
'a',
|
|
|
|
),
|
|
|
|
'<br<br>' => array(
|
|
|
|
'br',
|
|
|
|
'br',
|
|
|
|
),
|
|
|
|
);
|
|
|
|
|
|
|
|
foreach ($cases as $html => $expected) {
|
|
|
|
$events = $this->parse($html);
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', $expected[0], $events->get(1));
|
|
|
|
$this->assertEventEquals('startTag', $expected[1], $events->get(2));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(3));
|
|
|
|
}
|
|
|
|
|
|
|
|
$events = $this->parse('<span<>02</span>');
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', 'span', $events->get(1));
|
|
|
|
$this->assertEventError($events->get(2));
|
|
|
|
$this->assertEventEquals('text', '>02', $events->get(3));
|
|
|
|
$this->assertEventEquals('endTag', 'span', $events->get(4));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(5));
|
|
|
|
|
|
|
|
$events = $this->parse('<p</p>');
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', 'p', $events->get(1));
|
|
|
|
$this->assertEventEquals('endTag', 'p', $events->get(2));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(3));
|
|
|
|
|
|
|
|
$events = $this->parse('<strong><WordPress</strong>');
|
|
|
|
$this->assertEventEquals('startTag', 'strong', $events->get(0));
|
|
|
|
$this->assertEventError($events->get(1));
|
|
|
|
$this->assertEventEquals('startTag', 'wordpress', $events->get(2));
|
|
|
|
$this->assertEventEquals('endTag', 'strong', $events->get(3));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(4));
|
|
|
|
|
|
|
|
$events = $this->parse('<src=<a>');
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventError($events->get(1));
|
|
|
|
$this->assertEventError($events->get(2));
|
|
|
|
$this->assertEventEquals('startTag', 'src', $events->get(3));
|
|
|
|
$this->assertEventEquals('startTag', 'a', $events->get(4));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(5));
|
|
|
|
|
|
|
|
$events = $this->parse('<br...<a>');
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', 'br', $events->get(1));
|
|
|
|
$this->assertEventEquals('eof', null, $events->get(2));
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testIllegalTagNames()
|
|
|
|
{
|
|
|
|
$cases = array(
|
|
|
|
'<li">' => 'li',
|
|
|
|
'<p">' => 'p',
|
|
|
|
'<b >' => 'b',
|
|
|
|
'<static*all>' => 'static',
|
|
|
|
'<h*0720/>' => 'h',
|
|
|
|
'<st*ATTRIBUTE />' => 'st',
|
|
|
|
);
|
|
|
|
|
|
|
|
foreach ($cases as $html => $expected) {
|
|
|
|
$events = $this->parse($html);
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', $expected, $events->get(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @depends testCharacterReference
|
|
|
|
*/
|
|
|
|
public function testTagAttributes()
|
|
|
|
{
|
|
|
|
// Opening tags.
|
|
|
|
$good = array(
|
|
|
|
'<foo bar="baz">' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar=" baz ">' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => ' baz ',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo bar=\"\nbaz\n\">" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => "\nbaz\n",
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo bar='baz'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar="A full sentence.">' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'A full sentence.',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo a='1' b=\"2\">" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => '1',
|
|
|
|
'b' => '2',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo ns:bar='baz'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'ns:bar' => 'baz',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo a='blue&red'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => 'blue&red',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo a='blue&red'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => 'blue&red',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo a='blue&&&red'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => 'blue&&&red',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo a='blue&&red'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => 'blue&&red',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
"<foo\nbar='baz'\n>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<doe a deer>' => array(
|
|
|
|
'doe',
|
|
|
|
array(
|
|
|
|
'a' => null,
|
|
|
|
'deer' => null,
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar=baz>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
|
|
|
|
// Updated for 8.1.2.3
|
|
|
|
'<foo bar = "baz" >' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
|
|
|
|
// The spec allows an unquoted value '/'. This will not be a closing
|
|
|
|
// tag.
|
|
|
|
'<foo bar=/>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => '/',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar=baz/>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz/',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
);
|
|
|
|
$this->isAllGood('startTag', 2, $good);
|
|
|
|
|
|
|
|
// Self-closing tags.
|
|
|
|
$withEnd = array(
|
|
|
|
'<foo bar="baz"/>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<foo BAR="baz"/>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'baz',
|
|
|
|
),
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
'<foo BAR="BAZ"/>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'BAZ',
|
|
|
|
),
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
"<foo a='1' b=\"2\" c=3 d/>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => '1',
|
|
|
|
'b' => '2',
|
|
|
|
'c' => '3',
|
|
|
|
'd' => null,
|
|
|
|
),
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
);
|
|
|
|
$this->isAllGood('startTag', 2, $withEnd);
|
|
|
|
|
|
|
|
// Cause a parse error.
|
|
|
|
$bad = array(
|
|
|
|
// This will emit an entity lookup failure for &+dark.
|
|
|
|
"<foo a='blue&+dark'>" => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'a' => 'blue&+dark',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar=>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => null,
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar="oh' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'oh',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo bar=oh">' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'oh"',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
|
|
|
|
// these attributes are ignored because of current implementation
|
|
|
|
// of method "DOMElement::setAttribute"
|
|
|
|
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
|
|
|
|
'<foo b"="baz">' => array(
|
|
|
|
'foo',
|
|
|
|
array(),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo 2abc="baz">' => array(
|
|
|
|
'foo',
|
|
|
|
array(),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo ?="baz">' => array(
|
|
|
|
'foo',
|
|
|
|
array(),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo foo?bar="baz">' => array(
|
|
|
|
'foo',
|
|
|
|
array(),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
;
|
|
|
|
foreach ($bad as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', $expects, $events->get(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Cause multiple parse errors.
|
|
|
|
$reallyBad = array(
|
|
|
|
'<foo ="bar">' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'=' => null,
|
|
|
|
'"bar"' => null,
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
'<foo////>' => array(
|
|
|
|
'foo',
|
|
|
|
array(),
|
|
|
|
true,
|
|
|
|
),
|
|
|
|
// character "&" in unquoted attribute shouldn't cause an infinite loop
|
|
|
|
'<foo bar=index.php?str=1&id=29>' => array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'bar' => 'index.php?str=1&id=29',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
),
|
|
|
|
);
|
|
|
|
foreach ($reallyBad as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
// fprintf(STDOUT, $test . print_r($events, true));
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventError($events->get(1));
|
|
|
|
// $this->assertEventEquals('startTag', $expects, $events->get(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Regression: Malformed elements should be detected.
|
|
|
|
// '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false),
|
|
|
|
$events = $this->parse('<foo baz="1" <bar></foo>');
|
|
|
|
$this->assertEventError($events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', array(
|
|
|
|
'foo',
|
|
|
|
array(
|
|
|
|
'baz' => '1',
|
|
|
|
),
|
|
|
|
false,
|
|
|
|
), $events->get(1));
|
|
|
|
$this->assertEventEquals('startTag', array(
|
|
|
|
'bar',
|
|
|
|
array(),
|
|
|
|
false,
|
|
|
|
), $events->get(2));
|
|
|
|
$this->assertEventEquals('endTag', array(
|
|
|
|
'foo',
|
|
|
|
), $events->get(3));
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testRawText()
|
|
|
|
{
|
|
|
|
$good = array(
|
|
|
|
'<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop',
|
|
|
|
'<script><not/><the/><tag></script>' => '<not/><the/><tag>',
|
|
|
|
'<script><<<<<<<<</script>' => '<<<<<<<<',
|
|
|
|
'<script>hello</script</script>' => 'hello</script',
|
|
|
|
"<script>\nhello</script\n</script>" => "\nhello</script\n",
|
|
|
|
'<script>&</script>' => '&',
|
|
|
|
'<script><!--not a comment--></script>' => '<!--not a comment-->',
|
|
|
|
'<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>',
|
|
|
|
);
|
|
|
|
foreach ($good as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEventEquals('startTag', 'script', $events->get(0));
|
|
|
|
$this->assertEventEquals('text', $expects, $events->get(1));
|
|
|
|
$this->assertEventEquals('endTag', 'script', $events->get(2));
|
|
|
|
}
|
|
|
|
|
|
|
|
$bad = array(
|
|
|
|
'<script>&</script' => '&</script',
|
|
|
|
'<script>Hello world' => 'Hello world',
|
|
|
|
);
|
|
|
|
foreach ($bad as $test => $expects) {
|
|
|
|
$events = $this->parse($test);
|
|
|
|
$this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, true));
|
|
|
|
$this->assertEventEquals('startTag', 'script', $events->get(0));
|
|
|
|
$this->assertEventError($events->get(1));
|
|
|
|
$this->assertEventEquals('text', $expects, $events->get(2));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Testing case sensitivity
|
|
|
|
$events = $this->parse('<TITLE>a test</TITLE>');
|
|
|
|
$this->assertEventEquals('startTag', 'title', $events->get(0));
|
|
|
|
$this->assertEventEquals('text', 'a test', $events->get(1));
|
|
|
|
$this->assertEventEquals('endTag', 'title', $events->get(2));
|
|
|
|
|
|
|
|
// Testing end tags with whitespaces
|
|
|
|
$events = $this->parse('<title>Whitespaces are tasty</title >');
|
|
|
|
$this->assertEventEquals('startTag', 'title', $events->get(0));
|
|
|
|
$this->assertEventEquals('text', 'Whitespaces are tasty', $events->get(1));
|
|
|
|
$this->assertEventEquals('endTag', 'title', $events->get(2));
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testRcdata()
|
|
|
|
{
|
|
|
|
list($tok, $events) = $this->createTokenizer('<title>'<!-- not a comment --></TITLE>');
|
|
|
|
$tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
|
|
|
|
$tok->parse();
|
|
|
|
$this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
public function testText()
|
|
|
|
{
|
|
|
|
$events = $this->parse('a<br>b');
|
|
|
|
$this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
$this->assertEventEquals('text', 'a', $events->get(0));
|
|
|
|
$this->assertEventEquals('startTag', 'br', $events->get(1));
|
|
|
|
$this->assertEventEquals('text', 'b', $events->get(2));
|
|
|
|
|
|
|
|
$events = $this->parse('<a>Test</a>');
|
|
|
|
$this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
$this->assertEventEquals('startTag', 'a', $events->get(0));
|
|
|
|
$this->assertEventEquals('text', 'Test', $events->get(1));
|
|
|
|
$this->assertEventEquals('endTag', 'a', $events->get(2));
|
|
|
|
|
|
|
|
$events = $this->parse('<p>0</p><p>1</p>');
|
|
|
|
$this->assertEquals(7, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
|
|
|
|
$this->assertEventEquals('startTag', 'p', $events->get(0));
|
|
|
|
$this->assertEventEquals('text', '0', $events->get(1));
|
|
|
|
$this->assertEventEquals('endTag', 'p', $events->get(2));
|
|
|
|
|
|
|
|
$this->assertEventEquals('startTag', 'p', $events->get(3));
|
|
|
|
$this->assertEventEquals('text', '1', $events->get(4));
|
|
|
|
$this->assertEventEquals('endTag', 'p', $events->get(5));
|
|
|
|
|
|
|
|
$events = $this->parse('a<![CDATA[test]]>b');
|
|
|
|
$this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
$this->assertEventEquals('text', 'a', $events->get(0));
|
|
|
|
$this->assertEventEquals('cdata', 'test', $events->get(1));
|
|
|
|
$this->assertEventEquals('text', 'b', $events->get(2));
|
|
|
|
|
|
|
|
$events = $this->parse('a<!--test-->b');
|
|
|
|
$this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
$this->assertEventEquals('text', 'a', $events->get(0));
|
|
|
|
$this->assertEventEquals('comment', 'test', $events->get(1));
|
|
|
|
$this->assertEventEquals('text', 'b', $events->get(2));
|
|
|
|
|
|
|
|
$events = $this->parse('a&b');
|
|
|
|
$this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
$this->assertEventEquals('text', 'a&b', $events->get(0));
|
|
|
|
|
|
|
|
$events = $this->parse('a²b');
|
|
|
|
$this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true));
|
|
|
|
$this->assertEventEquals('text', 'a²b', $events->get(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
// ================================================================
|
|
|
|
// Utility functions.
|
|
|
|
// ================================================================
|
|
|
|
protected function createTokenizer($string, $debug = false)
|
|
|
|
{
|
|
|
|
$eventHandler = new EventStack();
|
|
|
|
$scanner = new Scanner($string);
|
|
|
|
|
|
|
|
$scanner->debug = $debug;
|
|
|
|
|
|
|
|
return array(
|
|
|
|
new Tokenizer($scanner, $eventHandler),
|
|
|
|
$eventHandler,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function parse($string, $debug = false)
|
|
|
|
{
|
|
|
|
list($tok, $events) = $this->createTokenizer($string, $debug);
|
|
|
|
$tok->parse();
|
|
|
|
|
|
|
|
return $events;
|
|
|
|
}
|
|
|
|
}
|