Skip to content

Commit 9e3ada6

Browse files
committed
added TypeScript target and some modifications
1 parent 881f4a2 commit 9e3ada6

11 files changed

Lines changed: 940 additions & 644 deletions

File tree

PythonLexer.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ lexer grammar PythonLexer;
3131
options { superClass=PythonLexerBase; }
3232
tokens {
3333
// the following tokens are only for compatibility with the PythonLexerBase class
34-
LSQB, RSQB, LBRACE, RBRACE, TYPE_COMMENT, FSTRING_START, FSTRING_MIDDLE, FSTRING_END
34+
LSQB, RSQB, LBRACE, RBRACE, FSTRING_START, FSTRING_MIDDLE, FSTRING_END
3535
3636
// https://docs.python.org/3.8/reference/lexical_analysis.html#indentation
3737
, INDENT, DEDENT

port_CSharp/PythonLexerBase.cs

Lines changed: 45 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public abstract class PythonLexerBase : Lexer
3737
private Stack<int> indentLengthStack;
3838
// A list where tokens are waiting to be loaded into the token stream
3939
private LinkedList<IToken> pendingTokens;
40+
4041
// last pending token types
4142
private int previousPendingTokenType;
4243
private int lastPendingTokenTypeFromDefaultChannel;
@@ -47,11 +48,11 @@ public abstract class PythonLexerBase : Lexer
4748
private bool wasSpaceIndentation;
4849
private bool wasTabIndentation;
4950
private bool wasIndentationMixedWithSpacesAndTabs;
50-
private const int INVALID_LENGTH = -1;
5151

52-
private CommonToken curToken; // current (under processing) token
53-
private IToken ffgToken; // following (look ahead) token
52+
private IToken curToken; // current (under processing) token
53+
private IToken ffgToken; // following (look ahead) token
5454

55+
private const int INVALID_LENGTH = -1;
5556
private const string ERR_TXT = " ERROR: ";
5657

5758
protected PythonLexerBase(ICharStream input) : base(input)
@@ -64,6 +65,20 @@ protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter error
6465
this.Init();
6566
}
6667

68+
public override IToken NextToken() // reading the input stream until a return EOF
69+
{
70+
this.CheckNextToken();
71+
IToken firstPendingToken = this.pendingTokens.First.Value;
72+
this.pendingTokens.RemoveFirst();
73+
return firstPendingToken; // add the queued token to the token stream
74+
}
75+
76+
public override void Reset()
77+
{
78+
this.Init();
79+
base.Reset();
80+
}
81+
6782
private void Init()
6883
{
6984
this.indentLengthStack = new Stack<int>();
@@ -78,14 +93,6 @@ private void Init()
7893
this.ffgToken = null!;
7994
}
8095

81-
public override IToken NextToken() // reading the input stream until a return EOF
82-
{
83-
this.CheckNextToken();
84-
IToken firstPendingToken = this.pendingTokens.First.Value;
85-
this.pendingTokens.RemoveFirst();
86-
return firstPendingToken; // add the queued token to the token stream
87-
}
88-
8996
private void CheckNextToken()
9097
{
9198
if (this.previousPendingTokenType != TokenConstants.EOF)
@@ -113,9 +120,6 @@ private void CheckNextToken()
113120
case PythonLexer.NEWLINE:
114121
this.HandleNEWLINEtoken();
115122
break;
116-
case PythonLexer.STRING:
117-
this.HandleSTRINGtoken();
118-
break;
119123
case PythonLexer.ERRORTOKEN:
120124
this.ReportLexerError("token recognition error at: '" + this.curToken.Text + "'");
121125
this.AddPendingToken(this.curToken);
@@ -133,12 +137,12 @@ private void CheckNextToken()
133137
private void SetCurrentAndFollowingTokens()
134138
{
135139
this.curToken = this.ffgToken == null ?
136-
new CommonToken(base.NextToken()) :
137-
new CommonToken(this.ffgToken);
140+
base.NextToken() :
141+
this.ffgToken;
138142

139143
this.ffgToken = this.curToken.Type == TokenConstants.EOF ?
140-
this.curToken :
141-
base.NextToken();
144+
this.curToken :
145+
base.NextToken();
142146
}
143147

144148
// initialize the _indentLengths
@@ -196,7 +200,7 @@ private void HandleNEWLINEtoken()
196200
}
197201
else
198202
{
199-
CommonToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token
203+
IToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token
200204
bool isLookingAhead = this.ffgToken.Type == PythonLexer.WS;
201205
if (isLookingAhead)
202206
{
@@ -205,12 +209,12 @@ private void HandleNEWLINEtoken()
205209

206210
switch (this.ffgToken.Type)
207211
{
208-
case PythonLexer.NEWLINE: // We're before a blank line
209-
case PythonLexer.COMMENT: // We're before a comment
212+
case PythonLexer.NEWLINE: // We're before a blank line
213+
case PythonLexer.COMMENT: // We're before a comment
210214
this.HideAndAddPendingToken(nlToken);
211215
if (isLookingAhead)
212216
{
213-
this.AddPendingToken(this.curToken); // WS token
217+
this.AddPendingToken(this.curToken); // WS token
214218
}
215219
break;
216220
default:
@@ -243,7 +247,6 @@ private void HandleNEWLINEtoken()
243247

244248
private void InsertIndentOrDedentToken(int indentLength)
245249
{
246-
//*** https://docs.python.org/3/reference/lexical_analysis.html#indentation
247250
int prevIndentLength = this.indentLengthStack.Peek();
248251
if (indentLength > prevIndentLength)
249252
{
@@ -268,25 +271,6 @@ private void InsertIndentOrDedentToken(int indentLength)
268271
}
269272
}
270273

271-
private void HandleSTRINGtoken()
272-
{
273-
// remove the \<newline> escape sequences from the string literal
274-
// https://docs.python.org/3.11/reference/lexical_analysis.html#string-and-bytes-literals
275-
string line_joinFreeStringLiteral = Regex.Replace(this.curToken.Text, @"\\\r?\n", "");
276-
if (this.curToken.Text.Length == line_joinFreeStringLiteral.Length)
277-
{
278-
this.AddPendingToken(this.curToken);
279-
}
280-
else
281-
{
282-
CommonToken originalSTRINGtoken = new CommonToken(this.curToken); // backup the original token
283-
this.curToken.Text = line_joinFreeStringLiteral;
284-
this.AddPendingToken(this.curToken); // add the modified token with inline string literal
285-
this.HideAndAddPendingToken(originalSTRINGtoken); // add the original token with a hidden channel
286-
// this inserted hidden token allows to restore the original string literal with the \<newline> escape sequences
287-
}
288-
}
289-
290274
private void InsertTrailingTokens()
291275
{
292276
switch (this.lastPendingTokenTypeFromDefaultChannel)
@@ -311,42 +295,43 @@ private void HandleEOFtoken()
311295
this.AddPendingToken(this.curToken);
312296
}
313297

314-
private void HideAndAddPendingToken(CommonToken cToken)
298+
private void HideAndAddPendingToken(IToken tkn)
315299
{
316-
cToken.Channel = TokenConstants.HiddenChannel;
317-
this.AddPendingToken(cToken);
300+
CommonToken ctkn = new CommonToken(tkn);
301+
ctkn.Channel = TokenConstants.HiddenChannel;
302+
this.AddPendingToken(ctkn);
318303
}
319304

320-
private void CreateAndAddPendingToken(int type, int channel, string text, IToken baseToken)
305+
private void CreateAndAddPendingToken(int ttype, int channel, string text, IToken sampleToken)
321306
{
322-
CommonToken cToken = new CommonToken(baseToken);
323-
cToken.Type = type;
324-
cToken.Channel = channel;
325-
cToken.StopIndex = baseToken.StartIndex - 1;
307+
CommonToken ctkn = new CommonToken(sampleToken);
308+
ctkn.Type = ttype;
309+
ctkn.Channel = channel;
310+
ctkn.StopIndex = sampleToken.StartIndex - 1;
326311

327-
cToken.Text = text == null
328-
? "<" + Vocabulary.GetSymbolicName(type) + ">"
312+
ctkn.Text = text == null
313+
? "<" + Vocabulary.GetSymbolicName(ttype) + ">"
329314
: text;
330315

331-
this.AddPendingToken(cToken);
316+
this.AddPendingToken(ctkn);
332317
}
333318

334-
private void AddPendingToken(IToken token)
319+
private void AddPendingToken(IToken tkn)
335320
{
336321
// save the last pending token type because the pendingTokens linked list can be empty by the nextToken()
337-
this.previousPendingTokenType = token.Type;
338-
if (token.Channel == TokenConstants.DefaultChannel)
322+
this.previousPendingTokenType = tkn.Type;
323+
if (tkn.Channel == TokenConstants.DefaultChannel)
339324
{
340325
this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType;
341326
}
342-
this.pendingTokens.AddLast(token);
327+
this.pendingTokens.AddLast(tkn);
343328
}
344329

345-
private int GetIndentationLength(string textWS) // the textWS may contain spaces, tabs or form feeds
330+
private int GetIndentationLength(string indentText) // the indentText may contain spaces, tabs or form feeds
346331
{
347332
const int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces
348333
int length = 0;
349-
foreach (char ch in textWS)
334+
foreach (char ch in indentText)
350335
{
351336
switch (ch)
352337
{
@@ -369,7 +354,7 @@ private int GetIndentationLength(string textWS) // the textWS may contain spaces
369354
if (!this.wasIndentationMixedWithSpacesAndTabs)
370355
{
371356
this.wasIndentationMixedWithSpacesAndTabs = true;
372-
return PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent
357+
length = PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent
373358
}
374359
}
375360
return length;
@@ -387,10 +372,4 @@ private void ReportError(string errMsg)
387372
// the ERRORTOKEN will raise an error in the parser
388373
this.CreateAndAddPendingToken(PythonLexer.ERRORTOKEN, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.ffgToken);
389374
}
390-
391-
public override void Reset()
392-
{
393-
this.Init();
394-
base.Reset();
395-
}
396375
}

port_CSharp/csgrun4py.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ static void Main(string[] args)
2525

2626
private static string GetTokenMetaDataWithTokenName(PythonParser parser, IToken token)
2727
{
28-
String metaData = token.ToString()!; // original format: [@TokenIndex,StartIndex:StopIndex='Text',<TokenType>,channel=Channel,Line:Column]
29-
int lesserPos = metaData.LastIndexOf(",<");
30-
int greaterPos = metaData.LastIndexOf(">,");
31-
return metaData.Substring(0, lesserPos + 2) // modified format: [@TokenIndex,StartIndex:StopIndex='Text',<TokenName>,channel=Channel,Line:Column]
32-
+ parser.Vocabulary.GetSymbolicName(token.Type)
33-
+ metaData.Substring(greaterPos);
28+
String tokenText = token.ToString();
29+
String tokenName = token.Type == TokenConstants.EOF ? "EOF" : parser.Vocabulary.GetDisplayName(token.Type);
30+
String channelText = token.Channel == TokenConstants.DefaultChannel ? "" : "channel=" + token.Channel + ",";
3431

32+
// original format: [@TokenIndex,StartIndex:StopIndex='Text',<TokenType>,channel=Channel,Line:Column]
33+
// modified format: [@TokenIndex,StartIndex:StopIndex='Text',<TokenName>,channel=Channel,Line:Column]
34+
return "[@" + token.TokenIndex + "," + token.StartIndex + ":" + token.StopIndex + "='" + tokenText + "',<" + tokenName + ">," + channelText + token.Line + ":" + token.Column + "]";
3535
}
3636
}
3737
}

0 commit comments

Comments
 (0)