Skip to content

Commit

Permalink
use PCRE MARK verb to identify token types
Browse files Browse the repository at this point in the history
  • Loading branch information
thunderer committed Dec 20, 2024
1 parent 8d66350 commit 8654631
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 25 deletions.
2 changes: 1 addition & 1 deletion docker/php-5.x/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ RUN apt update && apt install -y --force-yes libonig-dev libzip-dev
RUN docker-php-ext-install mbstring zip

RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \
&& php -r "if (hash_file('sha384', 'composer-setup.php') === 'c31c1e292ad7be5f49291169c0ac8f683499edddcfd4e42232982d0fd193004208a58ff6f353fde0012d35fdd72bc394') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
&& php -r "if (hash_file('sha384', 'composer-setup.php') === 'dac665fdc30fdd8ec78b38b9800061b4150413ff2e3b6f88543c636f7cd84f6db9189d43a81e5503cda447da73c7e5b6') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
&& php composer-setup.php \
&& php -r "unlink('composer-setup.php');" \
&& mv composer.phar /usr/local/bin/composer
2 changes: 1 addition & 1 deletion docker/php/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ARG PHP_VERSION=8.0
FROM php:$PHP_VERSION

RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \
&& php -r "if (hash_file('sha384', 'composer-setup.php') === '906a84df04cea2aa72f40b5f787e49f22d4c2f19492ac310e8cba5b96ac8b64115ac402c8cd292b8a03482574915d1a8') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
&& php -r "if (hash_file('sha384', 'composer-setup.php') === 'dac665fdc30fdd8ec78b38b9800061b4150413ff2e3b6f88543c636f7cd84f6db9189d43a81e5503cda447da73c7e5b6') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
&& php composer-setup.php \
&& php -r "unlink('composer-setup.php');" \
&& mv composer.phar /usr/local/bin/composer
Expand Down
36 changes: 13 additions & 23 deletions src/Parser/RegularParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -349,19 +349,9 @@ private function tokenize($text)

$tokens = array();
$position = 0;

foreach($matches as $match) {
switch(true) {
case array_key_exists('close', $match): { $token = $match['close']; $type = self::TOKEN_CLOSE; break; }
case array_key_exists('open', $match): { $token = $match['open']; $type = self::TOKEN_OPEN; break; }
case array_key_exists('separator', $match): { $token = $match['separator']; $type = self::TOKEN_SEPARATOR; break; }
case array_key_exists('delimiter', $match): { $token = $match['delimiter']; $type = self::TOKEN_DELIMITER; break; }
case array_key_exists('marker', $match): { $token = $match['marker']; $type = self::TOKEN_MARKER; break; }
case array_key_exists('ws', $match): { $token = $match['ws']; $type = self::TOKEN_WS; break; }
case array_key_exists('string', $match): { $token = $match['string']; $type = self::TOKEN_STRING; break; }
default: { throw new \RuntimeException('Invalid token.'); }
}
$tokens[] = array($type, $token, $position);
$token = $match[0];
$tokens[] = array((int)$match['MARK'], $token, $position);
$position += mb_strlen($token, 'utf-8');
}

Expand All @@ -373,31 +363,31 @@ private function prepareLexer(SyntaxInterface $syntax)
{
// FIXME: for some reason Psalm does not understand the `@psalm-var callable() $var` annotation
/** @psalm-suppress MissingClosureParamType, MissingClosureReturnType */
$group = function($text, $group) {
return '(?<'.(string)$group.'>'.preg_replace('/(.)/us', '\\\\$0', (string)$text).')';
$group = function($text) {
return preg_replace('/(.)/us', '\\\\$0', (string)$text);
};
/** @psalm-suppress MissingClosureParamType, MissingClosureReturnType */
$quote = function($text) {
return preg_replace('/(.)/us', '\\\\$0', (string)$text);
};

$rules = array(
'(?<string>\\\\.|(?:(?!'.implode('|', array(
'\\\\.(*:'.self::TOKEN_STRING.')|(?:(?!'.implode('|', array(
$quote($syntax->getOpeningTag()),
$quote($syntax->getClosingTag()),
$quote($syntax->getClosingTagMarker()),
$quote($syntax->getParameterValueSeparator()),
$quote($syntax->getParameterValueDelimiter()),
'\s+',
)).').)+)',
'(?<ws>\s+)',
$group($syntax->getClosingTagMarker(), 'marker'),
$group($syntax->getParameterValueDelimiter(), 'delimiter'),
$group($syntax->getParameterValueSeparator(), 'separator'),
$group($syntax->getOpeningTag(), 'open'),
$group($syntax->getClosingTag(), 'close'),
)).').)+(*:'.self::TOKEN_STRING.')',
'\s+(*:'.self::TOKEN_WS.')',
$group($syntax->getClosingTagMarker()).'(*:'.self::TOKEN_MARKER.')',
$group($syntax->getParameterValueDelimiter()).'(*:'.self::TOKEN_DELIMITER.')',
$group($syntax->getParameterValueSeparator()).'(*:'.self::TOKEN_SEPARATOR.')',
$group($syntax->getOpeningTag()).'(*:'.self::TOKEN_OPEN.')',
$group($syntax->getClosingTag()).'(*:'.self::TOKEN_CLOSE.')',
);

return '~('.implode('|', $rules).')~us';
return '~(?|'.implode('|', $rules).')~us';
}
}
37 changes: 37 additions & 0 deletions tests/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -318,4 +318,41 @@ public function testInstances()
static::assertInstanceOf('Thunder\Shortcode\Parser\WordPressParser', new WordpressParser());
static::assertInstanceOf('Thunder\Shortcode\Parser\RegularParser', new RegularParser());
}

/** @dataProvider provideBenchmarks */
public function testGeneratedNesting($level, $num)
{
$parser = new RegularParser();
$text = $this->benchmark($level, $num);

$time = microtime(true);
$parser->parse($text);
$time = (microtime(true) - $time) * 1000; // ms

self::assertLessThan(7, $time);
}

private function benchmark($level, $num)
{
for($i = 1; $i <= $level; $i++) {
$text = str_repeat('[s'.$i.']'.$i, $num);
}
for($i = $level; $i >= 1; $i--) {
$text .= str_repeat($i.'[/s'.$i.']', $num);
}

return $text;
}

public static function provideBenchmarks()
{
$cases = array();
foreach(range(10, 100, 10) as $level) {
foreach(range(1, 10, 1) as $num) {
$cases[] = [$level, $num];
}
}

return $cases;
}
}

0 comments on commit 8654631

Please sign in to comment.