diff --git a/src/Main.java b/src/Main.java index 8dc04fd..952f037 100644 --- a/src/Main.java +++ b/src/Main.java @@ -1,7 +1,11 @@ import java.util.*; import java.io.*; -//import static Lexer.Token; +import common.*; +import lexer.*; +import parser.*; + +import static lexer.Lexer.Token; public class Main{ @@ -27,11 +31,11 @@ public static void main(String[] args){ Lexer lex = new Lexer(r); - List tokens = new ArrayList<>(); + List tokens = new ArrayList<>(); while(!(lex.isDone() || lex.errorOccurred())){ try{ - Lexer.Token t = lex.yylex(); + Token t = lex.yylex(); if(!(t == null && lex.isDone())) tokens.add(t); } @@ -43,9 +47,9 @@ public static void main(String[] args){ } } - Iterator itr = tokens.iterator(); + Iterator itr = tokens.iterator(); - Lexer.Token t = null; + Token t = null; int line = 0; if(itr.hasNext()){ diff --git a/src/common/TokenTypeID.java b/src/common/TokenTypeID.java new file mode 100644 index 0000000..1f41919 --- /dev/null +++ b/src/common/TokenTypeID.java @@ -0,0 +1,56 @@ +package common; + +public interface TokenTypeID{ + public static final int _BOOLEAN = 49; /*Don't think 0 can be used as it may be used by parser for EOF. Replacing w/ next value (49) + because I DO NOT want to change each of the other constants. If anyone else wants to do so + (make _BOOLEAN=1 and the remaining constants counting up) feel free. -R*/ + public static final int _BREAK = 1; + public static final int _CLASS = 2; + public static final int _DOUBLE = 3; + public static final int _ELSE = 4; + public static final int _EXTENDS = 5; + public static final int _FOR = 6; + public static final int _IF = 7; + public static final int _IMPLEMENTS = 8; + public static final int _INT = 9; + public static final int _INTERFACE = 10; + public static final int _NEW = 11; + public static final int _NEWARRAY = 12; + public static final int _NULL = 13; + public static final int _PRINTLN = 14; + public static final int _READLN = 15; + public static final int _RETURN = 16; + public static final int _STRING = 17; + public static final int _VOID = 18; + public static final int _WHILE = 19; + public static final int _PLUS = 20; + public static final int _MINUS = 21; + public static final int _MULTIPLICATION = 22; + public static final int _DIVISION = 23; + public static final int _MOD = 24; + public static final int _LESS = 25; + public static final int _LESSEQUAL = 26; + public static final int _GREATER = 27; + public static final int _GREATEREQUAL = 28; + public static final int _EQUAL = 29; + public static final int _NOTEQUAL = 30; + public static final int _AND = 31; + public static final int _OR = 32; + public static final int _NOT = 33; + public static final int _ASSIGNOP = 34; + public static final int _SEMICOLON = 35; + public static final int _COMMA = 36; + public static final int _PERIOD = 37; + public static final int _LEFTPAREN = 38; + public static final int _RIGHTPAREN = 39; + public static final int _LEFTBRACKET = 40; + public static final int _RIGHTBRACKET = 41; + public static final int _LEFTBRACE = 42; + public static final int _RIGHTBRACE = 43; + public static final int _INTCONSTANT = 44; + public static final int _DOUBLECONSTANT = 45; + public static final int _STRINGCONSTANT = 46; + public static final int _BOOLEANCONSTANT = 47; + public static final int _ID = 48; + public static final int _ERROR = -1; +} \ No newline at end of file diff --git a/src/Trie.java b/src/common/Trie.java similarity index 99% rename from src/Trie.java rename to src/common/Trie.java index 29d86c6..f2fa244 100644 --- a/src/Trie.java +++ b/src/common/Trie.java @@ -1,3 +1,5 @@ +package common; + import java.util.*; import java.util.regex.Pattern; diff --git a/src/Lexer.java b/src/lexer/Lexer.java similarity index 86% rename from src/Lexer.java rename to src/lexer/Lexer.java index afc1279..5afcab6 100644 --- a/src/Lexer.java +++ b/src/lexer/Lexer.java @@ -2,12 +2,16 @@ // Generated by JFlex 1.8.2 http://jflex.de/ // source: toy.flex +package lexer; + import java.util.*; +import common.*; + // See https://github.com/jflex-de/jflex/issues/222 @SuppressWarnings("FallThrough") -public class Lexer { +public class Lexer implements TokenTypeID { /** This character denotes the end of file. */ public static final int YYEOF = -1; @@ -421,57 +425,6 @@ private static int zzUnpackAttribute(String packed, int offset, int [] result) { /* user code: */ // Class for tokens public static class Token{ - public static final int _BOOLEAN = 0; - public static final int _BREAK = 1; - public static final int _CLASS = 2; - public static final int _DOUBLE = 3; - public static final int _ELSE = 4; - public static final int _EXTENDS = 5; - public static final int _FOR = 6; - public static final int _IF = 7; - public static final int _IMPLEMENTS = 8; - public static final int _INT = 9; - public static final int _INTERFACE = 10; - public static final int _NEW = 11; - public static final int _NEWARRAY = 12; - public static final int _NULL = 13; - public static final int _PRINTLN = 14; - public static final int _READLN = 15; - public static final int _RETURN = 16; - public static final int _STRING = 17; - public static final int _VOID = 18; - public static final int _WHILE = 19; - public static final int _PLUS = 20; - public static final int _MINUS = 21; - public static final int _MULTIPLICATION = 22; - public static final int _DIVISION = 23; - public static final int _MOD = 24; - public static final int _LESS = 25; - public static final int _LESSEQUAL = 26; - public static final int _GREATER = 27; - public static final int _GREATEREQUAL = 28; - public static final int _EQUAL = 29; - public static final int _NOTEQUAL = 30; - public static final int _AND = 31; - public static final int _OR = 32; - public static final int _NOT = 33; - public static final int _ASSIGNOP = 34; - public static final int _SEMICOLON = 35; - public static final int _COMMA = 36; - public static final int _PERIOD = 37; - public static final int _LEFTPAREN = 38; - public static final int _RIGHTPAREN = 39; - public static final int _LEFTBRACKET = 40; - public static final int _RIGHTBRACKET = 41; - public static final int _LEFTBRACE = 42; - public static final int _RIGHTBRACE = 43; - public static final int _INTCONSTANT = 44; - public static final int _DOUBLECONSTANT = 45; - public static final int _STRINGCONSTANT = 46; - public static final int _BOOLEANCONSTANT = 47; - public static final int _ID = 48; - public static final int _ERROR = -1; - private final int type, line; private final String value; @@ -997,7 +950,7 @@ else if (zzAtEOF) { switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 1: { error = true; - return Token.build(Token._ERROR, yytext(), currentLine); + return Token.build(_ERROR, yytext(), currentLine); } // fall through case 62: break; @@ -1012,7 +965,7 @@ else if (zzAtEOF) { // fall through case 64: break; case 4: - { return Token.build(Token._NOT, currentLine); + { return Token.build(_NOT, currentLine); } // fall through case 65: break; @@ -1022,99 +975,99 @@ else if (zzAtEOF) { // fall through case 66: break; case 6: - { return Token.build(Token._MOD, currentLine); + { return Token.build(_MOD, currentLine); } // fall through case 67: break; case 7: - { return Token.build(Token._LEFTPAREN, currentLine); + { return Token.build(_LEFTPAREN, currentLine); } // fall through case 68: break; case 8: - { return Token.build(Token._RIGHTPAREN, currentLine); + { return Token.build(_RIGHTPAREN, currentLine); } // fall through case 69: break; case 9: - { return Token.build(Token._MULTIPLICATION, currentLine); + { return Token.build(_MULTIPLICATION, currentLine); } // fall through case 70: break; case 10: - { return Token.build(Token._PLUS, currentLine); + { return Token.build(_PLUS, currentLine); } // fall through case 71: break; case 11: - { return Token.build(Token._COMMA, currentLine); + { return Token.build(_COMMA, currentLine); } // fall through case 72: break; case 12: - { return Token.build(Token._MINUS, currentLine); + { return Token.build(_MINUS, currentLine); } // fall through case 73: break; case 13: - { return Token.build(Token._PERIOD, currentLine); + { return Token.build(_PERIOD, currentLine); } // fall through case 74: break; case 14: - { return Token.build(Token._DIVISION, currentLine); + { return Token.build(_DIVISION, currentLine); } // fall through case 75: break; case 15: - { return Token.build(Token._INTCONSTANT, yytext(), currentLine); + { return Token.build(_INTCONSTANT, yytext(), currentLine); } // fall through case 76: break; case 16: - { return Token.build(Token._SEMICOLON, currentLine); + { return Token.build(_SEMICOLON, currentLine); } // fall through case 77: break; case 17: - { return Token.build(Token._LESS, currentLine); + { return Token.build(_LESS, currentLine); } // fall through case 78: break; case 18: - { return Token.build(Token._EQUAL, currentLine); + { return Token.build(_EQUAL, currentLine); } // fall through case 79: break; case 19: - { return Token.build(Token._GREATER, currentLine); + { return Token.build(_GREATER, currentLine); } // fall through case 80: break; case 20: { String s = yytext(); symbolTable.reserve(s); - return Token.build(Token._ID, s, currentLine); + return Token.build(_ID, s, currentLine); } // fall through case 81: break; case 21: - { return Token.build(Token._LEFTBRACKET, currentLine); + { return Token.build(_LEFTBRACKET, currentLine); } // fall through case 82: break; case 22: - { return Token.build(Token._RIGHTBRACKET, currentLine); + { return Token.build(_RIGHTBRACKET, currentLine); } // fall through case 83: break; case 23: - { return Token.build(Token._LEFTBRACE, currentLine); + { return Token.build(_LEFTBRACE, currentLine); } // fall through case 84: break; case 24: - { return Token.build(Token._RIGHTBRACE, currentLine); + { return Token.build(_RIGHTBRACE, currentLine); } // fall through case 85: break; @@ -1127,7 +1080,7 @@ else if (zzAtEOF) { { String s = sLiteral.toString(); sLiteral = new StringBuilder(); yybegin(YYINITIAL); - return Token.build(Token._STRINGCONSTANT, s, currentLine); + return Token.build(_STRINGCONSTANT, s, currentLine); } // fall through case 87: break; @@ -1137,37 +1090,37 @@ else if (zzAtEOF) { // fall through case 88: break; case 28: - { return Token.build(Token._NOTEQUAL, currentLine); + { return Token.build(_NOTEQUAL, currentLine); } // fall through case 89: break; case 29: - { return Token.build(Token._AND, currentLine); + { return Token.build(_AND, currentLine); } // fall through case 90: break; case 30: - { return Token.build(Token._DOUBLECONSTANT, yytext(), currentLine); + { return Token.build(_DOUBLECONSTANT, yytext(), currentLine); } // fall through case 91: break; case 31: - { return Token.build(Token._LESSEQUAL, currentLine); + { return Token.build(_LESSEQUAL, currentLine); } // fall through case 92: break; case 32: - { return Token.build(Token._GREATEREQUAL, currentLine); + { return Token.build(_GREATEREQUAL, currentLine); } // fall through case 93: break; case 33: - { return Token.build(Token._IF, currentLine); + { return Token.build(_IF, currentLine); } // fall through case 94: break; case 34: - { return Token.build(Token._OR, currentLine); + { return Token.build(_OR, currentLine); } // fall through case 95: break; @@ -1200,112 +1153,112 @@ else if (zzAtEOF) { case 40: { String s = yytext(); s = Integer.decode(s).toString(); - return Token.build(Token._INTCONSTANT, s, currentLine); + return Token.build(_INTCONSTANT, s, currentLine); } // fall through case 101: break; case 41: - { return Token.build(Token._FOR, currentLine); + { return Token.build(_FOR, currentLine); } // fall through case 102: break; case 42: - { return Token.build(Token._INT, currentLine); + { return Token.build(_INT, currentLine); } // fall through case 103: break; case 43: - { return Token.build(Token._NEW, currentLine); + { return Token.build(_NEW, currentLine); } // fall through case 104: break; case 44: - { return Token.build(Token._ELSE, currentLine); + { return Token.build(_ELSE, currentLine); } // fall through case 105: break; case 45: - { return Token.build(Token._NULL, currentLine); + { return Token.build(_NULL, currentLine); } // fall through case 106: break; case 46: - { return Token.build(Token._BOOLEANCONSTANT, "true", currentLine); + { return Token.build(_BOOLEANCONSTANT, "true", currentLine); } // fall through case 107: break; case 47: - { return Token.build(Token._VOID, currentLine); + { return Token.build(_VOID, currentLine); } // fall through case 108: break; case 48: - { return Token.build(Token._BREAK, currentLine); + { return Token.build(_BREAK, currentLine); } // fall through case 109: break; case 49: - { return Token.build(Token._CLASS, currentLine); + { return Token.build(_CLASS, currentLine); } // fall through case 110: break; case 50: - { return Token.build(Token._BOOLEANCONSTANT, "false", currentLine); + { return Token.build(_BOOLEANCONSTANT, "false", currentLine); } // fall through case 111: break; case 51: - { return Token.build(Token._WHILE, currentLine); + { return Token.build(_WHILE, currentLine); } // fall through case 112: break; case 52: - { return Token.build(Token._DOUBLE, currentLine); + { return Token.build(_DOUBLE, currentLine); } // fall through case 113: break; case 53: - { return Token.build(Token._READLN, currentLine); + { return Token.build(_READLN, currentLine); } // fall through case 114: break; case 54: - { return Token.build(Token._RETURN, currentLine); + { return Token.build(_RETURN, currentLine); } // fall through case 115: break; case 55: - { return Token.build(Token._STRING, currentLine); + { return Token.build(_STRING, currentLine); } // fall through case 116: break; case 56: - { return Token.build(Token._BOOLEAN, currentLine); + { return Token.build(_BOOLEAN, currentLine); } // fall through case 117: break; case 57: - { return Token.build(Token._EXTENDS, currentLine); + { return Token.build(_EXTENDS, currentLine); } // fall through case 118: break; case 58: - { return Token.build(Token._PRINTLN, currentLine); + { return Token.build(_PRINTLN, currentLine); } // fall through case 119: break; case 59: - { return Token.build(Token._NEWARRAY, currentLine); + { return Token.build(_NEWARRAY, currentLine); } // fall through case 120: break; case 60: - { return Token.build(Token._INTERFACE, currentLine); + { return Token.build(_INTERFACE, currentLine); } // fall through case 121: break; case 61: - { return Token.build(Token._IMPLEMENTS, currentLine); + { return Token.build(_IMPLEMENTS, currentLine); } // fall through case 122: break; diff --git a/src/lexer/toy.flex b/src/lexer/toy.flex new file mode 100644 index 0000000..1f45dd9 --- /dev/null +++ b/src/lexer/toy.flex @@ -0,0 +1,309 @@ +package lexer; + +import java.util.*; + +import common.*; + +%% +%class Lexer +%type Token +%eofclose +%public +%implements TokenTypeID + +%{ +// Class for tokens +public static class Token{ + private final int type, line; + private final String value; + + private Token(int type, String value, int line) { + this.type = type; + this.value = value; + this.line = line; + } + + public static Token build(final int type, final String value, final int line) { + final Token token = new Token(type, value, line); + return token; + } + + public static Token build(final int type, final int line){ + return build(type, null, line); + } + + public String getValue() { + return value; + } + + public int getLineNumber(){ + return line; + } + + public String toString() { + switch(type){ + case _BOOLEAN: + return "boolean"; + case _BREAK: + return "break"; + case _CLASS: + return "class"; + case _DOUBLE: + return "double"; + case _ELSE: + return "else"; + case _EXTENDS: + return "extends"; + case _FOR: + return "for"; + case _IF: + return "if"; + case _IMPLEMENTS: + return "implements"; + case _INT: + return "int"; + case _INTERFACE: + return "interface"; + case _NEW: + return "new"; + case _NEWARRAY: + return "newarray"; + case _NULL: + return "null"; + case _PRINTLN: + return "println"; + case _READLN: + return "readln"; + case _RETURN: + return "return"; + case _STRING: + return "string"; + case _VOID: + return "void"; + case _WHILE: + return "while"; + case _PLUS: + return "plus"; + case _MINUS: + return "minus"; + case _MULTIPLICATION: + return "multiplication"; + case _DIVISION: + return "division"; + case _MOD: + return "mod"; + case _LESS: + return "less"; + case _LESSEQUAL: + return "lessequal"; + case _GREATER: + return "greater"; + case _GREATEREQUAL: + return "greaterequal"; + case _EQUAL: + return "equal"; + case _NOTEQUAL: + return "notequal"; + case _AND: + return "and"; + case _OR: + return "or"; + case _NOT: + return "not"; + case _ASSIGNOP: + return "assignop"; + case _SEMICOLON: + return "semicolon"; + case _COMMA: + return "comma"; + case _PERIOD: + return "period"; + case _LEFTPAREN: + return "leftparen"; + case _RIGHTPAREN: + return "rightparen"; + case _LEFTBRACKET: + return "leftbracket"; + case _RIGHTBRACKET: + return "rightbracket"; + case _LEFTBRACE: + return "leftbrace"; + case _RIGHTBRACE: + return "rightbrace"; + case _INTCONSTANT: + return "intconstant"; + case _DOUBLECONSTANT: + return "doubleconstant"; + case _STRINGCONSTANT: + return "stringconstant"; + case _BOOLEANCONSTANT: + return "booleanconstant"; + case _ID: + return "id"; + case _ERROR: + return "error"; + default: + return "unknown"; + } + } +} + +public Trie symbolTable = new Trie<>(); //String for now, can change later + +private boolean done = false, error = false; + +private StringBuilder sLiteral = new StringBuilder(); + +int currentLine = 1; + +public boolean isDone(){ + return done; +} + +public boolean errorOccurred(){ + return error; +} + +%} + +%init{ + symbolTable.reserve("boolean"); + symbolTable.reserve("break"); + symbolTable.reserve("class"); + symbolTable.reserve("double"); + symbolTable.reserve("else"); + symbolTable.reserve("extends"); + symbolTable.reserve("false"); + symbolTable.reserve("for"); + symbolTable.reserve("if"); + symbolTable.reserve("implements"); + symbolTable.reserve("int"); + symbolTable.reserve("interface"); + symbolTable.reserve("new"); + symbolTable.reserve("newarray"); + symbolTable.reserve("null"); + symbolTable.reserve("println"); + symbolTable.reserve("readln"); + symbolTable.reserve("return"); + symbolTable.reserve("string"); + symbolTable.reserve("true"); + symbolTable.reserve("void"); + symbolTable.reserve("while"); +%init} + +%eof{ + done = true; +%eof} + +DIGIT=[0-9] +HEX=[0-9]|[A-Fa-f] +NL=\r|\n|\r\n +WS= {NL}|[" "\t\f] + +DECLITERAL={DIGIT}+ +HEXLITERAL=0[xX]{HEX}+ + +DBLLITERAL={DIGIT}+"."({DIGIT}*((E|e)("+"|"-")?{DIGIT}+)?) + +IDENT=[A-Za-z][0-9A-Za-z_]* + +OCTAL=[0-7] +OCTESCAPE=\\[0-3]?{OCTAL}?{OCTAL} + +SLCOMMENT="//".* +MLCOMMENT="/*" ~"*/" + +COMMENT={SLCOMMENT}|{MLCOMMENT} + +%state STRINGLITERAL + +%% + + { + "boolean" {return Token.build(_BOOLEAN, currentLine);} + "break" {return Token.build(_BREAK, currentLine);} + "class" {return Token.build(_CLASS, currentLine);} + "double" {return Token.build(_DOUBLE, currentLine);} + "else" {return Token.build(_ELSE, currentLine);} + "extends" {return Token.build(_EXTENDS, currentLine);} + "false" {return Token.build(_BOOLEANCONSTANT, "false", currentLine);} + "for" {return Token.build(_FOR, currentLine);} + "if" {return Token.build(_IF, currentLine);} + "implements" {return Token.build(_IMPLEMENTS, currentLine);} + "int" {return Token.build(_INT, currentLine);} + "interface" {return Token.build(_INTERFACE, currentLine);} + "new" {return Token.build(_NEW, currentLine);} + "newarray" {return Token.build(_NEWARRAY, currentLine);} + "null" {return Token.build(_NULL, currentLine);} + "println" {return Token.build(_PRINTLN, currentLine);} + "readln" {return Token.build(_READLN, currentLine);} + "return" {return Token.build(_RETURN, currentLine);} + "string" {return Token.build(_STRING, currentLine);} + "true" {return Token.build(_BOOLEANCONSTANT, "true", currentLine);} + "void" {return Token.build(_VOID, currentLine);} + "while" {return Token.build(_WHILE, currentLine);} + "+" {return Token.build(_PLUS, currentLine);} + "-" {return Token.build(_MINUS, currentLine);} + "*" {return Token.build(_MULTIPLICATION, currentLine);} + "/" {return Token.build(_DIVISION, currentLine);} + "%" {return Token.build(_MOD, currentLine);} + "<" {return Token.build(_LESS, currentLine);} + "<=" {return Token.build(_LESSEQUAL, currentLine);} + ">" {return Token.build(_GREATER, currentLine);} + ">=" {return Token.build(_GREATEREQUAL, currentLine);} + "==" {return Token.build(_EQUAL, currentLine);} + "!=" {return Token.build(_NOTEQUAL, currentLine);} + "&&" {return Token.build(_AND, currentLine);} + "||" {return Token.build(_OR, currentLine);} + "!" {return Token.build(_NOT, currentLine);} + "=" {return Token.build(_EQUAL, currentLine);} + ";" {return Token.build(_SEMICOLON, currentLine);} + "," {return Token.build(_COMMA, currentLine);} + "." {return Token.build(_PERIOD, currentLine);} + "(" {return Token.build(_LEFTPAREN, currentLine);} + ")" {return Token.build(_RIGHTPAREN, currentLine);} + "[" {return Token.build(_LEFTBRACKET, currentLine);} + "]" {return Token.build(_RIGHTBRACKET, currentLine);} + "{" {return Token.build(_LEFTBRACE, currentLine);} + "}" {return Token.build(_RIGHTBRACE, currentLine);} + + {NL} {currentLine++;} + + {IDENT} {String s = yytext(); + symbolTable.reserve(s); + return Token.build(_ID, s, currentLine);} + + {DECLITERAL} {return Token.build(_INTCONSTANT, yytext(), currentLine);} + + {HEXLITERAL} {String s = yytext(); + s = Integer.decode(s).toString(); + return Token.build(_INTCONSTANT, s, currentLine);} + + {DBLLITERAL} {return Token.build(_DOUBLECONSTANT, yytext(), currentLine);} + + \" {yybegin(STRINGLITERAL);} + + {COMMENT} {} + {WS} {} +} + + { + \" {String s = sLiteral.toString(); + sLiteral = new StringBuilder(); + yybegin(YYINITIAL); + return Token.build(_STRINGCONSTANT, s, currentLine);} + + [^\n\r\"\\]+ {sLiteral.append(yytext());} /*not sure about this regex*/ + + \\n {sLiteral.append('\n');} + \\r {sLiteral.append('\r');} + \\t {sLiteral.append('\t');} + \\ {sLiteral.append('\\');} + \\\" {sLiteral.append('\"');} + + {OCTESCAPE} {char c = (char)Integer.parseInt(yytext().substring(1), 8); + sLiteral.append(c);} +} + +[^] {error = true; + return Token.build(_ERROR, yytext(), currentLine);} \ No newline at end of file diff --git a/src/parser/TokenScanner.java b/src/parser/TokenScanner.java new file mode 100644 index 0000000..1327843 --- /dev/null +++ b/src/parser/TokenScanner.java @@ -0,0 +1,17 @@ +package parser; + +import java.util.*; + +import common.*; + +import static lexer.Lexer.Token; + +public class TokenScanner implements TokenTypeID{ + private List stream; + private Iterator iterator; + + public TokenScanner(List tokens){ + stream = Collections.unmodifiableList(tokens); + iterator = stream.iterator(); + } +} \ No newline at end of file diff --git a/src/toy.flex b/src/toy.flex deleted file mode 100644 index bb193e6..0000000 --- a/src/toy.flex +++ /dev/null @@ -1,355 +0,0 @@ -import java.util.*; - -%% -%class Lexer -%type Token -%eofclose -%public - -%{ -// Class for tokens -public static class Token{ - public static final int _BOOLEAN = 0; - public static final int _BREAK = 1; - public static final int _CLASS = 2; - public static final int _DOUBLE = 3; - public static final int _ELSE = 4; - public static final int _EXTENDS = 5; - public static final int _FOR = 6; - public static final int _IF = 7; - public static final int _IMPLEMENTS = 8; - public static final int _INT = 9; - public static final int _INTERFACE = 10; - public static final int _NEW = 11; - public static final int _NEWARRAY = 12; - public static final int _NULL = 13; - public static final int _PRINTLN = 14; - public static final int _READLN = 15; - public static final int _RETURN = 16; - public static final int _STRING = 17; - public static final int _VOID = 18; - public static final int _WHILE = 19; - public static final int _PLUS = 20; - public static final int _MINUS = 21; - public static final int _MULTIPLICATION = 22; - public static final int _DIVISION = 23; - public static final int _MOD = 24; - public static final int _LESS = 25; - public static final int _LESSEQUAL = 26; - public static final int _GREATER = 27; - public static final int _GREATEREQUAL = 28; - public static final int _EQUAL = 29; - public static final int _NOTEQUAL = 30; - public static final int _AND = 31; - public static final int _OR = 32; - public static final int _NOT = 33; - public static final int _ASSIGNOP = 34; - public static final int _SEMICOLON = 35; - public static final int _COMMA = 36; - public static final int _PERIOD = 37; - public static final int _LEFTPAREN = 38; - public static final int _RIGHTPAREN = 39; - public static final int _LEFTBRACKET = 40; - public static final int _RIGHTBRACKET = 41; - public static final int _LEFTBRACE = 42; - public static final int _RIGHTBRACE = 43; - public static final int _INTCONSTANT = 44; - public static final int _DOUBLECONSTANT = 45; - public static final int _STRINGCONSTANT = 46; - public static final int _BOOLEANCONSTANT = 47; - public static final int _ID = 48; - public static final int _ERROR = -1; - - private final int type, line; - private final String value; - - private Token(int type, String value, int line) { - this.type = type; - this.value = value; - this.line = line; - } - - public static Token build(final int type, final String value, final int line) { - final Token token = new Token(type, value, line); - return token; - } - - public static Token build(final int type, final int line){ - return build(type, null, line); - } - - public String getValue() { - return value; - } - - public int getLineNumber(){ - return line; - } - - public String toString() { - switch(type){ - case _BOOLEAN: - return "boolean"; - case _BREAK: - return "break"; - case _CLASS: - return "class"; - case _DOUBLE: - return "double"; - case _ELSE: - return "else"; - case _EXTENDS: - return "extends"; - case _FOR: - return "for"; - case _IF: - return "if"; - case _IMPLEMENTS: - return "implements"; - case _INT: - return "int"; - case _INTERFACE: - return "interface"; - case _NEW: - return "new"; - case _NEWARRAY: - return "newarray"; - case _NULL: - return "null"; - case _PRINTLN: - return "println"; - case _READLN: - return "readln"; - case _RETURN: - return "return"; - case _STRING: - return "string"; - case _VOID: - return "void"; - case _WHILE: - return "while"; - case _PLUS: - return "plus"; - case _MINUS: - return "minus"; - case _MULTIPLICATION: - return "multiplication"; - case _DIVISION: - return "division"; - case _MOD: - return "mod"; - case _LESS: - return "less"; - case _LESSEQUAL: - return "lessequal"; - case _GREATER: - return "greater"; - case _GREATEREQUAL: - return "greaterequal"; - case _EQUAL: - return "equal"; - case _NOTEQUAL: - return "notequal"; - case _AND: - return "and"; - case _OR: - return "or"; - case _NOT: - return "not"; - case _ASSIGNOP: - return "assignop"; - case _SEMICOLON: - return "semicolon"; - case _COMMA: - return "comma"; - case _PERIOD: - return "period"; - case _LEFTPAREN: - return "leftparen"; - case _RIGHTPAREN: - return "rightparen"; - case _LEFTBRACKET: - return "leftbracket"; - case _RIGHTBRACKET: - return "rightbracket"; - case _LEFTBRACE: - return "leftbrace"; - case _RIGHTBRACE: - return "rightbrace"; - case _INTCONSTANT: - return "intconstant"; - case _DOUBLECONSTANT: - return "doubleconstant"; - case _STRINGCONSTANT: - return "stringconstant"; - case _BOOLEANCONSTANT: - return "booleanconstant"; - case _ID: - return "id"; - case _ERROR: - return "error"; - default: - return "unknown"; - } - } -} - -public Trie symbolTable = new Trie<>(); //String for now, can change later - -private boolean done = false, error = false; - -private StringBuilder sLiteral = new StringBuilder(); - -int currentLine = 1; - -public boolean isDone(){ - return done; -} - -public boolean errorOccurred(){ - return error; -} - -%} - -%init{ - symbolTable.reserve("boolean"); - symbolTable.reserve("break"); - symbolTable.reserve("class"); - symbolTable.reserve("double"); - symbolTable.reserve("else"); - symbolTable.reserve("extends"); - symbolTable.reserve("false"); - symbolTable.reserve("for"); - symbolTable.reserve("if"); - symbolTable.reserve("implements"); - symbolTable.reserve("int"); - symbolTable.reserve("interface"); - symbolTable.reserve("new"); - symbolTable.reserve("newarray"); - symbolTable.reserve("null"); - symbolTable.reserve("println"); - symbolTable.reserve("readln"); - symbolTable.reserve("return"); - symbolTable.reserve("string"); - symbolTable.reserve("true"); - symbolTable.reserve("void"); - symbolTable.reserve("while"); -%init} - -%eof{ - done = true; -%eof} - -DIGIT=[0-9] -HEX=[0-9]|[A-Fa-f] -NL=\r|\n|\r\n -WS= {NL}|[" "\t\f] - -DECLITERAL={DIGIT}+ -HEXLITERAL=0[xX]{HEX}+ - -DBLLITERAL={DIGIT}+"."({DIGIT}*((E|e)("+"|"-")?{DIGIT}+)?) - -IDENT=[A-Za-z][0-9A-Za-z_]* - -OCTAL=[0-7] -OCTESCAPE=\\[0-3]?{OCTAL}?{OCTAL} - -SLCOMMENT="//".* -MLCOMMENT="/*" ~"*/" - -COMMENT={SLCOMMENT}|{MLCOMMENT} - -%state STRINGLITERAL - -%% - - { - "boolean" {return Token.build(Token._BOOLEAN, currentLine);} - "break" {return Token.build(Token._BREAK, currentLine);} - "class" {return Token.build(Token._CLASS, currentLine);} - "double" {return Token.build(Token._DOUBLE, currentLine);} - "else" {return Token.build(Token._ELSE, currentLine);} - "extends" {return Token.build(Token._EXTENDS, currentLine);} - "false" {return Token.build(Token._BOOLEANCONSTANT, "false", currentLine);} - "for" {return Token.build(Token._FOR, currentLine);} - "if" {return Token.build(Token._IF, currentLine);} - "implements" {return Token.build(Token._IMPLEMENTS, currentLine);} - "int" {return Token.build(Token._INT, currentLine);} - "interface" {return Token.build(Token._INTERFACE, currentLine);} - "new" {return Token.build(Token._NEW, currentLine);} - "newarray" {return Token.build(Token._NEWARRAY, currentLine);} - "null" {return Token.build(Token._NULL, currentLine);} - "println" {return Token.build(Token._PRINTLN, currentLine);} - "readln" {return Token.build(Token._READLN, currentLine);} - "return" {return Token.build(Token._RETURN, currentLine);} - "string" {return Token.build(Token._STRING, currentLine);} - "true" {return Token.build(Token._BOOLEANCONSTANT, "true", currentLine);} - "void" {return Token.build(Token._VOID, currentLine);} - "while" {return Token.build(Token._WHILE, currentLine);} - "+" {return Token.build(Token._PLUS, currentLine);} - "-" {return Token.build(Token._MINUS, currentLine);} - "*" {return Token.build(Token._MULTIPLICATION, currentLine);} - "/" {return Token.build(Token._DIVISION, currentLine);} - "%" {return Token.build(Token._MOD, currentLine);} - "<" {return Token.build(Token._LESS, currentLine);} - "<=" {return Token.build(Token._LESSEQUAL, currentLine);} - ">" {return Token.build(Token._GREATER, currentLine);} - ">=" {return Token.build(Token._GREATEREQUAL, currentLine);} - "==" {return Token.build(Token._EQUAL, currentLine);} - "!=" {return Token.build(Token._NOTEQUAL, currentLine);} - "&&" {return Token.build(Token._AND, currentLine);} - "||" {return Token.build(Token._OR, currentLine);} - "!" {return Token.build(Token._NOT, currentLine);} - "=" {return Token.build(Token._EQUAL, currentLine);} - ";" {return Token.build(Token._SEMICOLON, currentLine);} - "," {return Token.build(Token._COMMA, currentLine);} - "." {return Token.build(Token._PERIOD, currentLine);} - "(" {return Token.build(Token._LEFTPAREN, currentLine);} - ")" {return Token.build(Token._RIGHTPAREN, currentLine);} - "[" {return Token.build(Token._LEFTBRACKET, currentLine);} - "]" {return Token.build(Token._RIGHTBRACKET, currentLine);} - "{" {return Token.build(Token._LEFTBRACE, currentLine);} - "}" {return Token.build(Token._RIGHTBRACE, currentLine);} - - {NL} {currentLine++;} - - {IDENT} {String s = yytext(); - symbolTable.reserve(s); - return Token.build(Token._ID, s, currentLine);} - - {DECLITERAL} {return Token.build(Token._INTCONSTANT, yytext(), currentLine);} - - {HEXLITERAL} {String s = yytext(); - s = Integer.decode(s).toString(); - return Token.build(Token._INTCONSTANT, s, currentLine);} - - {DBLLITERAL} {return Token.build(Token._DOUBLECONSTANT, yytext(), currentLine);} - - \" {yybegin(STRINGLITERAL);} - - {COMMENT} {} - {WS} {} -} - - { - \" {String s = sLiteral.toString(); - sLiteral = new StringBuilder(); - yybegin(YYINITIAL); - return Token.build(Token._STRINGCONSTANT, s, currentLine);} - - [^\n\r\"\\]+ {sLiteral.append(yytext());} /*not sure about this regex*/ - - \\n {sLiteral.append('\n');} - \\r {sLiteral.append('\r');} - \\t {sLiteral.append('\t');} - \\ {sLiteral.append('\\');} - \\\" {sLiteral.append('\"');} - - {OCTESCAPE} {char c = (char)Integer.parseInt(yytext().substring(1), 8); - sLiteral.append(c);} -} - -[^] {error = true; - return Token.build(Token._ERROR, yytext(), currentLine);} \ No newline at end of file