import java.io.*; /** * Scanner is responsible for slurping up a source file, iterating * through it and keeping track of the position of the current character. */ class Scanner { public char chr; // The current character. public int loc; // Zero-based current location. private byte[] sourceText; // The whole input file. public static final byte EOT = 4; // ASCII eot, used to mark end of file. /* ------------------------------------------------------------------------- */ // The constructor opens, reads, and stores the input file. public Scanner(String filename) throws FileNotFoundException, IOException { FileInputStream fis = new FileInputStream(filename); int size = fis.available(); sourceText = new byte[size + 1]; fis.read(sourceText); fis.close(); sourceText[size] = EOT; // Stuff a sentinel at end. loc = 0; chr = (char) sourceText[0]; } /* ------------------------------------------------------------------------- */ // Our main interface with the world. public void advance() { chr = (char) sourceText[++loc]; } } /* ========================================================================= */ /** * Token is a "smart" token class. It contains all the detailed knowledge * about the lexical structure of the language: * what the keywords and symbols are. When the Lexer is done loading in * the characters, it calls the Token's methods to decide what kind of * token it is and set its type field. */ class Token { public String type; // Later we will use enumerations. public StringBuffer spelling; // Our accumulated characters. public static final int TOKEN_SIZE = 16; public Token() { this.type = "Uninit"; this.spelling = new StringBuffer(TOKEN_SIZE); } /* ------------------------------------------------------------------------- */ // Knowledge of the punctuation characters is restricted to this method. public void finishChar() { String s = spelling.toString(); if (s.equals("(")) type = "L-paren"; else if (s.equals(")")) type = "R-paren"; else if (s.equals("{")) type = "L-brace"; else if (s.equals("}")) type = "R-brace"; else type = "Err"; } /* ------------------------------------------------------------------------- */ public void finishNum() { type = "Num"; } // Num is the token for integer literals public void finishEot() { type = "Eot"; } /* ------------------------------------------------------------------------- */ // Knowledge of the reserved words is restricted to this method. public void finishWord() { String s = spelling.toString(); if (s.equals("class")) type = "Class"; else if (s.equals("int")) type = "Int"; else if (s.equals("static")) type = "Static"; else if (s.equals("boolean"))type = "Boolean"; else if (s.equals("true")) type = "Bool"; // Bool is the token for boolean literals else type = "Id"; } /* ------------------------------------------------------------------------- */ public String toString() { if (type.equals("Id") || type.equals("Num") || type.equals("Bool")) return "\"" + spelling + "\""; else return type; } } /* ========================================================================= */ /** * Lexer uses a Scanner to read characters from a text file, and loads them into * a Token. It only knows that words start with letters, numbers start with * digits, and symbols are always one character. It lets Token contain all the * detailed knowledge about the language's low-level structure. */ class Lexer { public Token token; // When advance() exits, the just-read token. private Scanner scanner; // Feeds us the input file. /* ------------------------------------------------------------------------- */ public Lexer(Scanner s) throws Exception { scanner = s; advance(); } /* ------------------------------------------------------------------------- */ // Grab another character from the Scanner. private void takeIt() throws Exception { if (token.spelling.length() == Token.TOKEN_SIZE) throw new Exception("Token too long."); token.spelling.append(scanner.chr); scanner.advance(); } /* ------------------------------------------------------------------------- */ // Our main interface to the world. Fills the next token, then tells it // to identify itself. Sets our field "token" which the world can read. public void advance() throws Exception { while (Character.isWhitespace(scanner.chr)) scanner.advance(); token = new Token(); // Garbage-collect the old one. if (Character.isDigit(scanner.chr)) // Integer literal. { while (Character.isDigit(scanner.chr)) takeIt(); token.finishNum(); } else if (Character.isLetter(scanner.chr)) // Identifier or reserved word. { while (Character.isLetterOrDigit(scanner.chr)) takeIt(); token.finishWord(); } else if (scanner.chr == Scanner.EOT) // End of text. token.finishEot(); // ...but don't take it. else // Single character. { takeIt(); token.finishChar(); } } } /* ========================================================================= */ public class P1 { public static void main(String[] args) throws Exception { Scanner S = new Scanner(args[0]); // Read the input file. Lexer L = new Lexer(S); // Create the Lexer. do { System.out.println(L.token); L.advance(); } while (L.token.type != "Eot"); } }