Construction-of-Compilers/Project-02/triplalex.py

# ------------------------------------------------------------
# Tokenizer for the TRIPLA parser
# ------------------------------------------------------------

import ply.lex as lex

reserved = {
    'let': 'LET',
    'in': 'IN',
    'if': 'IF',
    'then': 'THEN',
    'else': 'ELSE',
    'while': 'WHILE',
    'do': 'DO',
    'true': 'TRUE',
    'false': 'FALSE'
}

# List of token names.
tokens = [
    'ID',
    'CONST',
    'AOP',
    'COMP',
    'EQOP',
    'LOP',
    'ASSIGN',
    'LPAREN', 'RPAREN',
    'LBRACE', 'RBRACE',
    'COMMA',
    'SEMICOLON',
] + list(reserved.values())

# Simple tokens
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_SEMICOLON = r';'
t_ASSIGN = r'='

# Arithmetic operators
t_AOP = r'\+|\-|\*|/'

# Comparison operators
t_COMP = r'<=|>=|<|>'

# Equality operators
t_EQOP = r'\|\||&&|==|!='

# Logical operators
t_LOP = r'\|\||&&'

# IDs
def t_ID(t):
    r'[A-Za-z_][A-Za-z0-9_]*'
    t.type = reserved.get(t.value, 'ID')
    return t

# Constants
def t_CONST(t):
    r'0|[1-9][0-9]*'
    t.value = int(t.value)
    return t

# Linebreaks
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# Ignore whitespace
t_ignore  = ' \t'

# Single-line comment
def t_comment_single(t):
    r'//.*'
    pass

# Multi-line comment
def t_comment_multi(t):
    r'/\*([^*]|\*+[^*/])*\*/'
    t.lexer.lineno += t.value.count('\n')
    pass

# Error handling
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()