93 lines
1.6 KiB
Python
93 lines
1.6 KiB
Python
# ------------------------------------------------------------
|
|
# Tokenizer for the TRIPLA parser
|
|
# ------------------------------------------------------------
|
|
|
|
import ply.lex as lex
|
|
|
|
reserved = {
|
|
'let': 'LET',
|
|
'in': 'IN',
|
|
'if': 'IF',
|
|
'then': 'THEN',
|
|
'else': 'ELSE',
|
|
'while': 'WHILE',
|
|
'do': 'DO',
|
|
'true': 'TRUE',
|
|
'false': 'FALSE'
|
|
}
|
|
|
|
# List of token names.
|
|
tokens = [
|
|
'ID',
|
|
'CONST',
|
|
'AOP',
|
|
'COMP',
|
|
'EQOP',
|
|
'LOP',
|
|
'ASSIGN',
|
|
'LPAREN', 'RPAREN',
|
|
'LBRACE', 'RBRACE',
|
|
'COMMA',
|
|
'SEMICOLON',
|
|
] + list(reserved.values())
|
|
|
|
# Simple tokens
|
|
t_LPAREN = r'\('
|
|
t_RPAREN = r'\)'
|
|
t_LBRACE = r'\{'
|
|
t_RBRACE = r'\}'
|
|
t_COMMA = r','
|
|
t_SEMICOLON = r';'
|
|
t_ASSIGN = r'='
|
|
|
|
# Arithmetic operators
|
|
t_AOP = r'\+|\-|\*|/'
|
|
|
|
# Comparison operators
|
|
t_COMP = r'<=|>=|<|>'
|
|
|
|
# Equality operators
|
|
t_EQOP = r'\|\||&&|==|!='
|
|
|
|
# Logical operators
|
|
t_LOP = r'\|\||&&'
|
|
|
|
# IDs
|
|
def t_ID(t):
|
|
r'[A-Za-z_][A-Za-z0-9_]*'
|
|
t.type = reserved.get(t.value, 'ID')
|
|
return t
|
|
|
|
# Constants
|
|
def t_CONST(t):
|
|
r'0|[1-9][0-9]*'
|
|
t.value = int(t.value)
|
|
return t
|
|
|
|
# Linebreaks
|
|
def t_newline(t):
|
|
r'\n+'
|
|
t.lexer.lineno += len(t.value)
|
|
|
|
# Ignore whitespace
|
|
t_ignore = ' \t'
|
|
|
|
# Single-line comment
|
|
def t_comment_single(t):
|
|
r'//.*'
|
|
pass
|
|
|
|
# Multi-line comment
|
|
def t_comment_multi(t):
|
|
r'/\*([^*]|\*+[^*/])*\*/'
|
|
t.lexer.lineno += t.value.count('\n')
|
|
pass
|
|
|
|
# Error handling
|
|
def t_error(t):
|
|
print("Illegal character '%s'" % t.value[0])
|
|
t.lexer.skip(1)
|
|
|
|
# Build the lexer
|
|
lexer = lex.lex()
|