# ------------------------------------------------------------ # Tokenizer for the TRIPLA parser # ------------------------------------------------------------ import ply.lex as lex reserved = { 'let': 'LET', 'in': 'IN', 'if': 'IF', 'then': 'THEN', 'else': 'ELSE', 'while': 'WHILE', 'do': 'DO', 'true': 'TRUE', 'false': 'FALSE' } # List of token names. tokens = [ 'ID', 'CONST', 'AOP', 'COMP', 'EQOP', 'LOP', 'ASSIGN', 'LPAREN', 'RPAREN', 'LBRACE', 'RBRACE', 'COMMA', 'SEMICOLON', ] + list(reserved.values()) # Simple tokens t_LPAREN = r'\(' t_RPAREN = r'\)' t_LBRACE = r'\{' t_RBRACE = r'\}' t_COMMA = r',' t_SEMICOLON = r';' t_ASSIGN = r'=' # Arithmetic operators t_AOP = r'\+|\-|\*|/' # Comparison operators t_COMP = r'<=|>=|<|>' # Equality operators t_EQOP = r'\|\||&&|==|!=' # Logical operators t_LOP = r'\|\||&&' # IDs def t_ID(t): r'[A-Za-z_][A-Za-z0-9_]*' t.type = reserved.get(t.value, 'ID') return t # Constants def t_CONST(t): r'0|[1-9][0-9]*' t.value = int(t.value) return t # Linebreaks def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) # Ignore whitespace t_ignore = ' \t' # Single-line comment def t_comment_single(t): r'//.*' pass # Multi-line comment def t_comment_multi(t): r'/\*([^*]|\*+[^*/])*\*/' t.lexer.lineno += t.value.count('\n') pass # Error handling def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer lexer = lex.lex()