@@ -37,6 +37,7 @@ public abstract class PythonLexerBase : Lexer
3737 private Stack < int > indentLengthStack ;
3838 // A list where tokens are waiting to be loaded into the token stream
3939 private LinkedList < IToken > pendingTokens ;
40+
4041 // last pending token types
4142 private int previousPendingTokenType ;
4243 private int lastPendingTokenTypeFromDefaultChannel ;
@@ -47,11 +48,11 @@ public abstract class PythonLexerBase : Lexer
4748 private bool wasSpaceIndentation ;
4849 private bool wasTabIndentation ;
4950 private bool wasIndentationMixedWithSpacesAndTabs ;
50- private const int INVALID_LENGTH = - 1 ;
5151
52- private CommonToken curToken ; // current (under processing) token
53- private IToken ffgToken ; // following (look ahead) token
52+ private IToken curToken ; // current (under processing) token
53+ private IToken ffgToken ; // following (look ahead) token
5454
55+ private const int INVALID_LENGTH = - 1 ;
5556 private const string ERR_TXT = " ERROR: " ;
5657
5758 protected PythonLexerBase ( ICharStream input ) : base ( input )
@@ -64,6 +65,20 @@ protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter error
6465 this . Init ( ) ;
6566 }
6667
68+ public override IToken NextToken ( ) // reading the input stream until a return EOF
69+ {
70+ this . CheckNextToken ( ) ;
71+ IToken firstPendingToken = this . pendingTokens . First . Value ;
72+ this . pendingTokens . RemoveFirst ( ) ;
73+ return firstPendingToken ; // add the queued token to the token stream
74+ }
75+
76+ public override void Reset ( )
77+ {
78+ this . Init ( ) ;
79+ base . Reset ( ) ;
80+ }
81+
6782 private void Init ( )
6883 {
6984 this . indentLengthStack = new Stack < int > ( ) ;
@@ -78,14 +93,6 @@ private void Init()
7893 this . ffgToken = null ! ;
7994 }
8095
81- public override IToken NextToken ( ) // reading the input stream until a return EOF
82- {
83- this . CheckNextToken ( ) ;
84- IToken firstPendingToken = this . pendingTokens . First . Value ;
85- this . pendingTokens . RemoveFirst ( ) ;
86- return firstPendingToken ; // add the queued token to the token stream
87- }
88-
8996 private void CheckNextToken ( )
9097 {
9198 if ( this . previousPendingTokenType != TokenConstants . EOF )
@@ -113,9 +120,6 @@ private void CheckNextToken()
113120 case PythonLexer . NEWLINE :
114121 this . HandleNEWLINEtoken ( ) ;
115122 break ;
116- case PythonLexer . STRING :
117- this . HandleSTRINGtoken ( ) ;
118- break ;
119123 case PythonLexer . ERRORTOKEN :
120124 this . ReportLexerError ( "token recognition error at: '" + this . curToken . Text + "'" ) ;
121125 this . AddPendingToken ( this . curToken ) ;
@@ -133,12 +137,12 @@ private void CheckNextToken()
133137 private void SetCurrentAndFollowingTokens ( )
134138 {
135139 this . curToken = this . ffgToken == null ?
136- new CommonToken ( base . NextToken ( ) ) :
137- new CommonToken ( this . ffgToken ) ;
140+ base . NextToken ( ) :
141+ this . ffgToken ;
138142
139143 this . ffgToken = this . curToken . Type == TokenConstants . EOF ?
140- this . curToken :
141- base . NextToken ( ) ;
144+ this . curToken :
145+ base . NextToken ( ) ;
142146 }
143147
144148 // initialize the _indentLengths
@@ -196,7 +200,7 @@ private void HandleNEWLINEtoken()
196200 }
197201 else
198202 {
199- CommonToken nlToken = new CommonToken ( this . curToken ) ; // save the current NEWLINE token
203+ IToken nlToken = new CommonToken ( this . curToken ) ; // save the current NEWLINE token
200204 bool isLookingAhead = this . ffgToken . Type == PythonLexer . WS ;
201205 if ( isLookingAhead )
202206 {
@@ -205,12 +209,12 @@ private void HandleNEWLINEtoken()
205209
206210 switch ( this . ffgToken . Type )
207211 {
208- case PythonLexer . NEWLINE : // We're before a blank line
209- case PythonLexer . COMMENT : // We're before a comment
212+ case PythonLexer . NEWLINE : // We're before a blank line
213+ case PythonLexer . COMMENT : // We're before a comment
210214 this . HideAndAddPendingToken ( nlToken ) ;
211215 if ( isLookingAhead )
212216 {
213- this . AddPendingToken ( this . curToken ) ; // WS token
217+ this . AddPendingToken ( this . curToken ) ; // WS token
214218 }
215219 break ;
216220 default :
@@ -243,7 +247,6 @@ private void HandleNEWLINEtoken()
243247
244248 private void InsertIndentOrDedentToken ( int indentLength )
245249 {
246- //*** https://docs.python.org/3/reference/lexical_analysis.html#indentation
247250 int prevIndentLength = this . indentLengthStack . Peek ( ) ;
248251 if ( indentLength > prevIndentLength )
249252 {
@@ -268,25 +271,6 @@ private void InsertIndentOrDedentToken(int indentLength)
268271 }
269272 }
270273
271- private void HandleSTRINGtoken ( )
272- {
273- // remove the \<newline> escape sequences from the string literal
274- // https://docs.python.org/3.11/reference/lexical_analysis.html#string-and-bytes-literals
275- string line_joinFreeStringLiteral = Regex . Replace ( this . curToken . Text , @"\\\r?\n" , "" ) ;
276- if ( this . curToken . Text . Length == line_joinFreeStringLiteral . Length )
277- {
278- this . AddPendingToken ( this . curToken ) ;
279- }
280- else
281- {
282- CommonToken originalSTRINGtoken = new CommonToken ( this . curToken ) ; // backup the original token
283- this . curToken . Text = line_joinFreeStringLiteral ;
284- this . AddPendingToken ( this . curToken ) ; // add the modified token with inline string literal
285- this . HideAndAddPendingToken ( originalSTRINGtoken ) ; // add the original token with a hidden channel
286- // this inserted hidden token allows to restore the original string literal with the \<newline> escape sequences
287- }
288- }
289-
290274 private void InsertTrailingTokens ( )
291275 {
292276 switch ( this . lastPendingTokenTypeFromDefaultChannel )
@@ -311,42 +295,43 @@ private void HandleEOFtoken()
311295 this . AddPendingToken ( this . curToken ) ;
312296 }
313297
314- private void HideAndAddPendingToken ( CommonToken cToken )
298+ private void HideAndAddPendingToken ( IToken tkn )
315299 {
316- cToken . Channel = TokenConstants . HiddenChannel ;
317- this . AddPendingToken ( cToken ) ;
300+ CommonToken ctkn = new CommonToken ( tkn ) ;
301+ ctkn . Channel = TokenConstants . HiddenChannel ;
302+ this . AddPendingToken ( ctkn ) ;
318303 }
319304
320- private void CreateAndAddPendingToken ( int type , int channel , string text , IToken baseToken )
305+ private void CreateAndAddPendingToken ( int ttype , int channel , string text , IToken sampleToken )
321306 {
322- CommonToken cToken = new CommonToken ( baseToken ) ;
323- cToken . Type = type ;
324- cToken . Channel = channel ;
325- cToken . StopIndex = baseToken . StartIndex - 1 ;
307+ CommonToken ctkn = new CommonToken ( sampleToken ) ;
308+ ctkn . Type = ttype ;
309+ ctkn . Channel = channel ;
310+ ctkn . StopIndex = sampleToken . StartIndex - 1 ;
326311
327- cToken . Text = text == null
328- ? "<" + Vocabulary . GetSymbolicName ( type ) + ">"
312+ ctkn . Text = text == null
313+ ? "<" + Vocabulary . GetSymbolicName ( ttype ) + ">"
329314 : text ;
330315
331- this . AddPendingToken ( cToken ) ;
316+ this . AddPendingToken ( ctkn ) ;
332317 }
333318
334- private void AddPendingToken ( IToken token )
319+ private void AddPendingToken ( IToken tkn )
335320 {
336321 // save the last pending token type because the pendingTokens linked list can be empty by the nextToken()
337- this . previousPendingTokenType = token . Type ;
338- if ( token . Channel == TokenConstants . DefaultChannel )
322+ this . previousPendingTokenType = tkn . Type ;
323+ if ( tkn . Channel == TokenConstants . DefaultChannel )
339324 {
340325 this . lastPendingTokenTypeFromDefaultChannel = this . previousPendingTokenType ;
341326 }
342- this . pendingTokens . AddLast ( token ) ;
327+ this . pendingTokens . AddLast ( tkn ) ;
343328 }
344329
345- private int GetIndentationLength ( string textWS ) // the textWS may contain spaces, tabs or form feeds
330+ private int GetIndentationLength ( string indentText ) // the indentText may contain spaces, tabs or form feeds
346331 {
347332 const int TAB_LENGTH = 8 ; // the standard number of spaces to replace a tab with spaces
348333 int length = 0 ;
349- foreach ( char ch in textWS )
334+ foreach ( char ch in indentText )
350335 {
351336 switch ( ch )
352337 {
@@ -369,7 +354,7 @@ private int GetIndentationLength(string textWS) // the textWS may contain spaces
369354 if ( ! this . wasIndentationMixedWithSpacesAndTabs )
370355 {
371356 this . wasIndentationMixedWithSpacesAndTabs = true ;
372- return PythonLexerBase . INVALID_LENGTH ; // only for the first inconsistent indent
357+ length = PythonLexerBase . INVALID_LENGTH ; // only for the first inconsistent indent
373358 }
374359 }
375360 return length ;
@@ -387,10 +372,4 @@ private void ReportError(string errMsg)
387372 // the ERRORTOKEN will raise an error in the parser
388373 this . CreateAndAddPendingToken ( PythonLexer . ERRORTOKEN , TokenConstants . DefaultChannel , PythonLexerBase . ERR_TXT + errMsg , this . ffgToken ) ;
389374 }
390-
391- public override void Reset ( )
392- {
393- this . Init ( ) ;
394- base . Reset ( ) ;
395- }
396375}
0 commit comments