Files
Construction-of-Compilers/Project-02/triplalex.py
Jan-Niclas Loosen cb0c7ac2e0 Fix parsing error
2025-11-20 22:09:31 +01:00

93 lines
1.6 KiB
Python

# ------------------------------------------------------------
# Tokenizer for the TRIPLA parser
# ------------------------------------------------------------
import ply.lex as lex
reserved = {
'let': 'LET',
'in': 'IN',
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'while': 'WHILE',
'do': 'DO',
'true': 'TRUE',
'false': 'FALSE'
}
# List of token names. This is always requiredy
tokens = [
'ID',
'CONST',
'AOP',
'COMP',
'EQOP',
'LOP',
'ASSIGN',
'LPAREN', 'RPAREN',
'LBRACE', 'RBRACE',
'COMMA',
'SEMICOLON',
] + list(reserved.values())
# Simple tokens
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_SEMICOLON = r';'
t_ASSIGN = r'='
# Arithmetic operators
t_AOP = r'\+|\-|\*|/'
# Comparison operators
t_COMP = r'<=|>=|<|>'
# Equality operators
t_EQOP = r'\|\||&&|==|!='
# Logical operators
t_LOP = r'\|\||&&'
# IDs
def t_ID(t):
r'[A-Za-z_][A-Za-z0-9_]*'
t.type = reserved.get(t.value, 'ID')
return t
# Constants
def t_CONST(t):
r'0|[1-9][0-9]*'
t.value = int(t.value)
return t
# Linebreaks
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# Ignore whitespace
t_ignore = ' \t'
# Single-line comment
def t_comment_single(t):
r'//.*'
pass
# Multi-line comment
def t_comment_multi(t):
r'/\*([^*]|\*+[^*/])*\*/'
t.lexer.lineno += t.value.count('\n')
pass
# Error handling
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()