1313final class RegularParser implements ParserInterface
1414{
1515 private $ lexerRegex ;
16+ private $ nameRegex ;
1617 private $ tokens ;
1718 private $ tokensCount ;
1819 private $ position ;
19- /** @var array [] */
20+ /** @var int [] */
2021 private $ backtracks ;
22+ private $ lastBacktrack ;
2123
2224 const TOKEN_OPEN = 1 ;
2325 const TOKEN_CLOSE = 2 ;
@@ -29,7 +31,8 @@ final class RegularParser implements ParserInterface
2931
3032 public function __construct (SyntaxInterface $ syntax = null )
3133 {
32- $ this ->lexerRegex = $ this ->getTokenizerRegex ($ syntax ?: new CommonSyntax ());
34+ $ this ->lexerRegex = $ this ->prepareLexer ($ syntax ?: new CommonSyntax ());
35+ $ this ->nameRegex = '~^ ' .RegexBuilderUtility::buildNameRegex ().'$~us ' ;
3336 }
3437
3538 /**
@@ -39,10 +42,12 @@ public function __construct(SyntaxInterface $syntax = null)
3942 */
4043 public function parse ($ text )
4144 {
45+ $ nestingLevel = ini_set ('xdebug.max_nesting_level ' , -1 );
4246 $ this ->tokens = $ this ->tokenize ($ text );
4347 $ this ->backtracks = array ();
48+ $ this ->lastBacktrack = 0 ;
4449 $ this ->position = 0 ;
45- $ this ->tokensCount = count ($ this ->tokens );
50+ $ this ->tokensCount = \ count ($ this ->tokens );
4651
4752 $ shortcodes = array ();
4853 while ($ this ->position < $ this ->tokensCount ) {
@@ -52,12 +57,13 @@ public function parse($text)
5257 $ names = array ();
5358 $ this ->beginBacktrack ();
5459 $ matches = $ this ->shortcode ($ names );
55- if (is_array ($ matches )) {
60+ if (\ is_array ($ matches )) {
5661 foreach ($ matches as $ shortcode ) {
5762 $ shortcodes [] = $ shortcode ;
5863 }
5964 }
6065 }
66+ ini_set ('xdebug.max_nesting_level ' , $ nestingLevel );
6167
6268 return $ shortcodes ;
6369 }
@@ -71,71 +77,49 @@ private function getObject($name, $parameters, $bbCode, $offset, $content, $text
7177
7278 private function shortcode (array &$ names )
7379 {
74- $ name = null ;
75- $ offset = null ;
76-
77- $ setName = function (array $ token ) use (&$ name ) { $ name = $ token [1 ]; };
78- $ setOffset = function (array $ token ) use (&$ offset ) { $ offset = $ token [2 ]; };
79-
80- if (!$ this ->match (self ::TOKEN_OPEN , $ setOffset , true )) { return false ; }
81- if (!$ this ->match (self ::TOKEN_STRING , $ setName , false )) { return false ; }
80+ if (!$ this ->match (self ::TOKEN_OPEN , false )) { return false ; }
81+ $ offset = $ this ->tokens [$ this ->position - 1 ][2 ];
82+ $ this ->match (self ::TOKEN_WS , false );
83+ if ('' === $ name = $ this ->match (self ::TOKEN_STRING , false )) { return false ; }
8284 if ($ this ->lookahead (self ::TOKEN_STRING )) { return false ; }
83- if (!preg_match_all ('~^ ' .RegexBuilderUtility::buildNameRegex ().'$~us ' , $ name , $ matches )) { return false ; }
84- $ this ->match (self ::TOKEN_WS );
85- if (false === ($ bbCode = $ this ->bbCode ())) { return false ; }
85+ if (1 !== preg_match ($ this ->nameRegex , $ name , $ matches )) { return false ; }
86+ $ this ->match (self ::TOKEN_WS , false );
87+ // bbCode
88+ $ bbCode = $ this ->match (self ::TOKEN_SEPARATOR , true ) ? $ this ->value () : null ;
89+ if (false === $ bbCode ) { return false ; }
90+ // parameters
8691 if (false === ($ parameters = $ this ->parameters ())) { return false ; }
8792
8893 // self-closing
89- if ($ this ->match (self ::TOKEN_MARKER , null , true )) {
90- if (!$ this ->match (self ::TOKEN_CLOSE )) { return false ; }
94+ if ($ this ->match (self ::TOKEN_MARKER , true )) {
95+ if (!$ this ->match (self ::TOKEN_CLOSE , false )) { return false ; }
9196
9297 return array ($ this ->getObject ($ name , $ parameters , $ bbCode , $ offset , null , $ this ->getBacktrack ()));
9398 }
9499
95100 // just-closed or with-content
96- if (!$ this ->match (self ::TOKEN_CLOSE )) { return false ; }
101+ if (!$ this ->match (self ::TOKEN_CLOSE , false )) { return false ; }
97102 $ this ->beginBacktrack ();
98103 $ names [] = $ name ;
99- list ($ content , $ shortcodes , $ closingName ) = $ this ->content ($ names );
100- if (null !== $ closingName && $ closingName !== $ name ) {
101- array_pop ($ names );
102- array_pop ($ this ->backtracks );
103- array_pop ($ this ->backtracks );
104-
105- return $ closingName ;
106- }
107- if (false === $ content || $ closingName !== $ name ) {
108- $ this ->backtrack (false );
109- $ text = $ this ->backtrack (false );
110-
111- return array_merge (array ($ this ->getObject ($ name , $ parameters , $ bbCode , $ offset , null , $ text )), $ shortcodes );
112- }
113- $ content = $ this ->getBacktrack ();
114- if (!$ this ->close ($ names )) { return false ; }
115-
116- return array ($ this ->getObject ($ name , $ parameters , $ bbCode , $ offset , $ content , $ this ->getBacktrack ()));
117- }
118104
119- private function content (array &$ names )
120- {
121- $ content = null ;
105+ // begin inlined content()
106+ $ content = '' ;
122107 $ shortcodes = array ();
123108 $ closingName = null ;
124- $ appendContent = function (array $ token ) use (&$ content ) { $ content .= $ token [1 ]; };
125109
126110 while ($ this ->position < $ this ->tokensCount ) {
127111 while ($ this ->position < $ this ->tokensCount && false === $ this ->lookahead (self ::TOKEN_OPEN )) {
128- $ this ->match (null , $ appendContent , true );
112+ $ content .= $ this ->match (null , true );
129113 }
130114
131115 $ this ->beginBacktrack ();
132- $ matchedShortcodes = $ this ->shortcode ($ names );
133- if (is_string ($ matchedShortcodes )) {
134- $ closingName = $ matchedShortcodes ;
116+ $ contentMatchedShortcodes = $ this ->shortcode ($ names );
117+ if (\ is_string ($ contentMatchedShortcodes )) {
118+ $ closingName = $ contentMatchedShortcodes ;
135119 break ;
136120 }
137- if (is_array ($ matchedShortcodes )) {
138- foreach ($ matchedShortcodes as $ matchedShortcode ) {
121+ if (\ is_array ($ contentMatchedShortcodes )) {
122+ foreach ($ contentMatchedShortcodes as $ matchedShortcode ) {
139123 $ shortcodes [] = $ matchedShortcode ;
140124 }
141125 continue ;
@@ -152,44 +136,51 @@ private function content(array &$names)
152136 $ closingName = null ;
153137 $ this ->backtrack ();
154138
155- $ this ->match (null , $ appendContent );
139+ $ content .= $ this ->match (null , false );
156140 }
141+ $ content = $ this ->position < $ this ->tokensCount ? $ content : false ;
142+ // end inlined content()
157143
158- return array ($ this ->position < $ this ->tokensCount ? $ content : false , $ shortcodes , $ closingName );
159- }
144+ if (null !== $ closingName && $ closingName !== $ name ) {
145+ array_pop ($ names );
146+ array_pop ($ this ->backtracks );
147+ array_pop ($ this ->backtracks );
160148
161- private function close (array &$ names )
162- {
163- $ closingName = null ;
164- $ setName = function (array $ token ) use (&$ closingName ) { $ closingName = $ token [1 ]; };
149+ return $ closingName ;
150+ }
151+ if (false === $ content || $ closingName !== $ name ) {
152+ $ this ->backtrack (false );
153+ $ text = $ this ->backtrack (false );
165154
166- if (! $ this ->match ( self :: TOKEN_OPEN , null , true )) { return false ; }
167- if (! $ this -> match ( self :: TOKEN_MARKER , null , true )) { return false ; }
168- if (! $ this ->match ( self :: TOKEN_STRING , $ setName , true )) { return false ; }
169- if (!$ this ->match ( self :: TOKEN_CLOSE )) { return false ; }
155+ return array_merge ( array ( $ this ->getObject ( $ name , $ parameters , $ bbCode , $ offset , null , $ text )), $ shortcodes );
156+ }
157+ $ content = $ this ->getBacktrack ();
158+ if (!$ this ->close ( $ names )) { return false ; }
170159
171- return in_array ( $ closingName , $ names , true ) ? $ closingName : false ;
160+ return array ( $ this -> getObject ( $ name , $ parameters , $ bbCode , $ offset , $ content , $ this -> getBacktrack ())) ;
172161 }
173162
174- private function bbCode ( )
163+ private function close ( array & $ names )
175164 {
176- return $ this ->match (self ::TOKEN_SEPARATOR , null , true ) ? $ this ->value () : null ;
165+ if (!$ this ->match (self ::TOKEN_OPEN , true )) { return false ; }
166+ if (!$ this ->match (self ::TOKEN_MARKER , true )) { return false ; }
167+ if (!$ closingName = $ this ->match (self ::TOKEN_STRING , true )) { return false ; }
168+ if (!$ this ->match (self ::TOKEN_CLOSE , false )) { return false ; }
169+
170+ return \in_array ($ closingName , $ names , true ) ? $ closingName : false ;
177171 }
178172
179173 private function parameters ()
180174 {
181175 $ parameters = array ();
182- $ setName = function (array $ token ) use (&$ name ) { $ name = $ token [1 ]; };
183176
184177 while (true ) {
185- $ name = null ;
186-
187- $ this ->match (self ::TOKEN_WS );
178+ $ this ->match (self ::TOKEN_WS , false );
188179 if ($ this ->lookahead (self ::TOKEN_MARKER ) || $ this ->lookahead (self ::TOKEN_CLOSE )) { break ; }
189- if (!$ this ->match (self ::TOKEN_STRING , $ setName , true )) { return false ; }
190- if (!$ this ->match (self ::TOKEN_SEPARATOR , null , true )) { $ parameters [$ name ] = null ; continue ; }
180+ if (!$ name = $ this ->match (self ::TOKEN_STRING , true )) { return false ; }
181+ if (!$ this ->match (self ::TOKEN_SEPARATOR , true )) { $ parameters [$ name ] = null ; continue ; }
191182 if (false === ($ value = $ this ->value ())) { return false ; }
192- $ this ->match (self ::TOKEN_WS );
183+ $ this ->match (self ::TOKEN_WS , false );
193184
194185 $ parameters [$ name ] = $ value ;
195186 }
@@ -200,19 +191,19 @@ private function parameters()
200191 private function value ()
201192 {
202193 $ value = '' ;
203- $ appendValue = function (array $ token ) use (&$ value ) { $ value .= $ token [1 ]; };
204194
205- if ($ this ->match (self ::TOKEN_DELIMITER )) {
195+ if ($ this ->match (self ::TOKEN_DELIMITER , false )) {
206196 while ($ this ->position < $ this ->tokensCount && false === $ this ->lookahead (self ::TOKEN_DELIMITER )) {
207- $ this ->match (null , $ appendValue );
197+ $ value .= $ this ->match (null , false );
208198 }
209199
210- return $ this ->match (self ::TOKEN_DELIMITER ) ? $ value : false ;
200+ return $ this ->match (self ::TOKEN_DELIMITER , false ) ? $ value : false ;
211201 }
212202
213- if ($ this ->match (self ::TOKEN_STRING , $ appendValue )) {
214- while ($ this ->match (self ::TOKEN_STRING , $ appendValue )) {
215- continue ;
203+ if ($ tmp = $ this ->match (self ::TOKEN_STRING , false )) {
204+ $ value .= $ tmp ;
205+ while ($ tmp = $ this ->match (self ::TOKEN_STRING , false )) {
206+ $ value .= $ tmp ;
216207 }
217208
218209 return $ value ;
@@ -225,85 +216,83 @@ private function value()
225216
226217 private function beginBacktrack ()
227218 {
228- $ this ->backtracks [] = array ();
219+ $ this ->backtracks [] = $ this ->position ;
220+ $ this ->lastBacktrack = $ this ->position ;
229221 }
230222
231223 private function getBacktrack ()
232224 {
233- // switch from array_map() to array_column() when dropping support for PHP <5.5
234- return implode ('' , array_map (function (array $ token ) { return $ token [1 ]; }, array_pop ($ this ->backtracks )));
225+ $ position = array_pop ($ this ->backtracks );
226+ $ backtrack = '' ;
227+ for ($ i = $ position ; $ i < $ this ->position ; $ i ++) {
228+ $ backtrack .= $ this ->tokens [$ i ][1 ];
229+ }
230+
231+ return $ backtrack ;
235232 }
236233
237234 private function backtrack ($ modifyPosition = true )
238235 {
239- $ tokens = array_pop ($ this ->backtracks );
240- $ count = count ($ tokens );
236+ $ position = array_pop ($ this ->backtracks );
241237 if ($ modifyPosition ) {
242- $ this ->position - = $ count ;
238+ $ this ->position = $ position ;
243239 }
244240
245- foreach ($ this ->backtracks as &$ backtrack ) {
246- // array_pop() in loop is much faster than array_slice() because
247- // it operates directly on the passed array
248- for ($ i = 0 ; $ i < $ count ; $ i ++) {
249- array_pop ($ backtrack );
250- }
241+ $ backtrack = '' ;
242+ for ($ i = $ position ; $ i < $ this ->lastBacktrack ; $ i ++) {
243+ $ backtrack .= $ this ->tokens [$ i ][1 ];
251244 }
245+ $ this ->lastBacktrack = $ position ;
252246
253- return implode ( '' , array_map ( function ( array $ token ) { return $ token [ 1 ]; }, $ tokens )) ;
247+ return $ backtrack ;
254248 }
255249
256250 private function lookahead ($ type )
257251 {
258- return $ this ->position < $ this ->tokensCount && ( empty ( $ type ) || $ this ->tokens [$ this ->position ][0 ] === $ type) ;
252+ return $ this ->position < $ this ->tokensCount && $ this ->tokens [$ this ->position ][0 ] === $ type ;
259253 }
260254
261- private function match ($ type , $ callback = null , $ ws = false )
255+ private function match ($ type , $ ws )
262256 {
263257 if ($ this ->position >= $ this ->tokensCount ) {
264- return false ;
258+ return '' ;
265259 }
266260
267261 $ token = $ this ->tokens [$ this ->position ];
268262 if (!empty ($ type ) && $ token [0 ] !== $ type ) {
269- return false ;
270- }
271- foreach ($ this ->backtracks as &$ backtrack ) {
272- $ backtrack [] = $ token ;
263+ return '' ;
273264 }
274- unset($ backtrack );
275265
276- $ callback && $ callback ($ token );
277266 $ this ->position ++;
278-
279267 if ($ ws && $ this ->position < $ this ->tokensCount && $ this ->tokens [$ this ->position ][0 ] === self ::TOKEN_WS ) {
280- $ token = $ this ->tokens [$ this ->position ];
281268 $ this ->position ++;
282- foreach ($ this ->backtracks as &$ backtrack ) {
283- $ backtrack [] = $ token ;
284- }
285269 }
286270
287- return true ;
271+ return $ token [ 1 ] ;
288272 }
289273
290274 /* --- LEXER ----------------------------------------------------------- */
291275
292276 private function tokenize ($ text )
293277 {
294- preg_match_all ($ this ->lexerRegex , $ text , $ matches , PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
278+ $ count = preg_match_all ($ this ->lexerRegex , $ text , $ matches , PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
279+ if (false === $ count || preg_last_error () !== PREG_NO_ERROR ) {
280+ throw new \RuntimeException (sprintf ('PCRE failure `%s`. ' , preg_last_error ()));
281+ }
282+
295283 $ tokens = array ();
296284 $ position = 0 ;
297285
298286 foreach ($ matches as $ match ) {
299287 switch (true ) {
300- case -1 !== $ match ['open ' ][1 ]: { $ token = $ match ['open ' ][0 ]; $ type = self ::TOKEN_OPEN ; break ; }
301- case -1 !== $ match ['close ' ][1 ]: { $ token = $ match ['close ' ][0 ]; $ type = self ::TOKEN_CLOSE ; break ; }
288+ case -1 !== $ match ['string ' ][1 ]: { $ token = $ match ['string ' ][0 ]; $ type = self ::TOKEN_STRING ; break ; }
289+ case -1 !== $ match ['ws ' ][1 ]: { $ token = $ match ['ws ' ][0 ]; $ type = self ::TOKEN_WS ; break ; }
302290 case -1 !== $ match ['marker ' ][1 ]: { $ token = $ match ['marker ' ][0 ]; $ type = self ::TOKEN_MARKER ; break ; }
303- case -1 !== $ match ['separator ' ][1 ]: { $ token = $ match ['separator ' ][0 ]; $ type = self ::TOKEN_SEPARATOR ; break ; }
304291 case -1 !== $ match ['delimiter ' ][1 ]: { $ token = $ match ['delimiter ' ][0 ]; $ type = self ::TOKEN_DELIMITER ; break ; }
305- case -1 !== $ match ['ws ' ][1 ]: { $ token = $ match ['ws ' ][0 ]; $ type = self ::TOKEN_WS ; break ; }
306- default : { $ token = $ match ['string ' ][0 ]; $ type = self ::TOKEN_STRING ; }
292+ case -1 !== $ match ['separator ' ][1 ]: { $ token = $ match ['separator ' ][0 ]; $ type = self ::TOKEN_SEPARATOR ; break ; }
293+ case -1 !== $ match ['open ' ][1 ]: { $ token = $ match ['open ' ][0 ]; $ type = self ::TOKEN_OPEN ; break ; }
294+ case -1 !== $ match ['close ' ][1 ]: { $ token = $ match ['close ' ][0 ]; $ type = self ::TOKEN_CLOSE ; break ; }
295+ default : { throw new \RuntimeException (sprintf ('Invalid token. ' )); }
307296 }
308297 $ tokens [] = array ($ type , $ token , $ position );
309298 $ position += mb_strlen ($ token , 'utf-8 ' );
@@ -312,7 +301,7 @@ private function tokenize($text)
312301 return $ tokens ;
313302 }
314303
315- private function getTokenizerRegex (SyntaxInterface $ syntax )
304+ private function prepareLexer (SyntaxInterface $ syntax )
316305 {
317306 $ group = function ($ text , $ group ) {
318307 return '(?< ' .$ group .'> ' .preg_replace ('/(.)/us ' , '\\\\$0 ' , $ text ).') ' ;
@@ -322,12 +311,6 @@ private function getTokenizerRegex(SyntaxInterface $syntax)
322311 };
323312
324313 $ rules = array (
325- $ group ($ syntax ->getOpeningTag (), 'open ' ),
326- $ group ($ syntax ->getClosingTag (), 'close ' ),
327- $ group ($ syntax ->getClosingTagMarker (), 'marker ' ),
328- $ group ($ syntax ->getParameterValueSeparator (), 'separator ' ),
329- $ group ($ syntax ->getParameterValueDelimiter (), 'delimiter ' ),
330- '(?<ws>\s+) ' ,
331314 '(?<string> \\\\.|(?:(?! ' .implode ('| ' , array (
332315 $ quote ($ syntax ->getOpeningTag ()),
333316 $ quote ($ syntax ->getClosingTag ()),
@@ -336,6 +319,12 @@ private function getTokenizerRegex(SyntaxInterface $syntax)
336319 $ quote ($ syntax ->getParameterValueDelimiter ()),
337320 '\s+ ' ,
338321 )).').)+) ' ,
322+ '(?<ws>\s+) ' ,
323+ $ group ($ syntax ->getClosingTagMarker (), 'marker ' ),
324+ $ group ($ syntax ->getParameterValueDelimiter (), 'delimiter ' ),
325+ $ group ($ syntax ->getParameterValueSeparator (), 'separator ' ),
326+ $ group ($ syntax ->getOpeningTag (), 'open ' ),
327+ $ group ($ syntax ->getClosingTag (), 'close ' ),
339328 );
340329
341330 return '~( ' .implode ('| ' , $ rules ).')~us ' ;
0 commit comments