Skip to content

Commit

Permalink
Initial work on tokenizer enhancements.
Browse files Browse the repository at this point in the history
  • Loading branch information
c4xuxo committed Dec 2, 2022
1 parent 6c6e2de commit cc108ca
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 25 deletions.
2 changes: 2 additions & 0 deletions src/parser/statementParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ enum STORAGE_MODIFIERS {
STATIC = 'static',
NEW = 'new',
LITERAL = 'literal',
FINAL = 'final',
}

enum ACCESS_MODIFIERS {
PUBLIC = 'public',
PRIVATE = 'private',
PROTECTED = 'protected',
}

enum STATEMENT_KEYWORD {
Expand Down
140 changes: 115 additions & 25 deletions src/parser/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export class Token {
return this.type === Type.Alphanumeric;
}
isNumeric() {
return this.type === Type.Numeric;
return this.type === Type.Integer;
}
isLineComment() {
return this.type === Type.LineComment;
Expand Down Expand Up @@ -268,7 +268,7 @@ class Tokenizer {

parseCharacter(char: string): boolean {
if (this.tokenType === Type.Alphanumeric) {
if (this.charType === Type.Alphanumeric || this.charType === Type.Numeric) {
if (this.charType === Type.Alphanumeric || this.charType === Type.Integer) {
this.tokenValue = this.tokenValue + char;
this.parsed = true;
this.documentColumn++;
Expand All @@ -277,8 +277,8 @@ class Tokenizer {
this.finalizeToken(this.charType);
return true;
}
} else if (this.tokenType === Type.Numeric) {
if (this.charType === Type.Numeric) {
} else if (this.tokenType === Type.Integer) {
if (this.charType === Type.Integer) {
this.tokenValue = this.tokenValue + char;
this.parsed = true;
this.documentColumn++;
Expand Down Expand Up @@ -400,13 +400,13 @@ class Tokenizer {
this.documentColumn = 0;
this.finalizeToken(0);
return true;
} else if (this.tokenType > 10) { // all other token types
} else if (this.tokenType === -1) { // undefined
this.tokenValue = this.tokenValue + char;
this.parsed = true;
this.documentColumn++;
this.finalizeToken(0);
return true;
} else if (this.tokenType === -1) { // undefined
} else if (this.tokenType >= 0) { // all other token types
this.tokenValue = this.tokenValue + char;
this.parsed = true;
this.documentColumn++;
Expand All @@ -415,7 +415,6 @@ class Tokenizer {
}
return false;
}

finalizeToken(newType: number): void {
this.token = new Token(this.tokenType, this.tokenValue, this.tokenPosition);
this.tokenType = newType;
Expand All @@ -426,19 +425,27 @@ class Tokenizer {

function getType(c: string): Type {
const charCode: number = c.charCodeAt(0);

if (charCode === 12313) {
return Type.Alphanumeric;
if (charCode === 9) {
return Type.Tab;
} else if (charCode === 10) {
return Type.LineFeed;
}
// Find a better way to incorporate the %
if (charCode >= 65 && charCode <= 90 || charCode >= 97 && charCode <= 122 || charCode === 37) {
return Type.Alphanumeric;
} else if (charCode >= 48 && charCode <= 57) {
return Type.Numeric;
return Type.Integer;
} else if (charCode === 34) {
return Type.DoubleQuotes;
} else if (charCode === 47) {
return Type.Slash;
} else if (charCode === 9) {
return Type.Tab;
} else if (charCode === 10) {
return Type.NewLine;
return Type.LineFeed;
} else if (charCode === 32) {
return Type.Space;
} else if (charCode === 33) {
Expand Down Expand Up @@ -508,24 +515,107 @@ function getType(c: string): Type {
}

export const enum Type {
Alphanumeric = 1,
Numeric = 2,
LineComment = 3,
BlockComment = 4,
String = 5,
LineCommentInit = 6,
BlockCommentInit = 7,
BlockCommentTerm = 8,
DoubleQuotes = 9,
Slash = 10,

Tab = 11,
NewLine = 13,
// Special case for an undefined token.
Undefined = -1,

// Other negative numbers are reserved for composite tokens.

//TODO: reg for Alphanumeric
/*
Expression: RegExp('') >> [%A-Za-z][A-Za-z0-9]* A-Z any Alpha character defined in unicode
Examples:
- %, %1, %A, %a, az12, a12, é12, ú13, josé
*/
//
Alphanumeric = -2,

/*
Expression: RegExp('^[0-9]*\.?[0-9]+$')
Examples:
- 0.5
- .1
- 10
- 10.56
*/
Number = -3,

/*
Expression: RegExp('')
Examples:
- //
*/
LineCommentInitPSL = -10,

/*
Expression: RegExp('') TODO up to end of line
Examples:
- // Line comment
- //Line comment
*/
LineCommentPSL = -4,

/*
Expression: RegExp('') TODO up to end of line
Examples:
- ;Line comment
- ; Line comment
*/
LineCommentMUMPS = -5,

/*
Expression: RegExp('') TODO
Examples:
- /*
*/
BlockCommentInit = -6,

/*
Expression: RegExp('') TODO
Examples:
- *\/
Note: '/' is escaped to prevent problems with terminator of
this block comment.
*/
BlockCommentTerm = -7,

/*
Expression: Everything between BlockCommentInit and BlockCommentTerm
Examples:
- /* My block comment *\/
- /*
My block comment
*\/
Note: '/' is escaped to prevent problems with terminator of
this block comment.
*/
BlockComment = -8,

/*
Expression: RegExp('')
Examples:
- "Anything between double quotes"
*/
String = -9,

/*
Expression: RegExp('')
Examples:
- LineFeed
- CarriageReturn
- CarriageReturn LineFeed
*/
EndOfLine = -12,

// Non-negative numbers are reserved for unicode codepoints.
Tab = 9,
LineFeed = 10,
CarriageReturn = 13,
Space = 32,
ExclamationMark = 33,
DoubleQuotes = 34,
NumberSign = 35,
DollarSign = 36,
// PercentSign = 37,
PercentSign = 37,
Ampersand = 38,
SingleQuote = 39,
OpenParen = 40,
Expand All @@ -535,6 +625,7 @@ export const enum Type {
Comma = 44,
MinusSign = 45,
Period = 46,
ForwardSlash = 47,
Colon = 58,
SemiColon = 59,
LessThan = 60,
Expand All @@ -553,5 +644,4 @@ export const enum Type {
CloseBrace = 125,
Tilde = 126,

Undefined = -1,
}
}

0 comments on commit cc108ca

Please sign in to comment.