Finish main task, start refactoring

This commit is contained in:
Jan-Niclas Loosen
2025-11-20 15:10:38 +01:00
parent 622ecef369
commit eb362896fd
8 changed files with 1415 additions and 208 deletions

View File

@@ -1,18 +1,52 @@
# This is a sample Python script for testing your TRIPLA parser.
# In PyCharm press Umschalt+F10 to execute it.
# (c) Stephan Diehl / Updated for AST display by ChatGPT
import triplayacc as yacc
import triplalex as lex
import syntax
from graphviz import Source
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
def test_parser(name):
source = "\n".join(open(name).readlines())
ast = yacc.parser.parse(source) # ,debug=True)
print("AST:")
def display_ast_graph(dotfile="ast.dot"):
"""Render DOT → PNG and display via matplotlib (always)."""
# Read DOT
with open(dotfile) as f:
dot_data = f.read()
# Render using Graphviz
src = Source(dot_data)
src.render("ast", format="png", cleanup=True)
# Load rendered PNG
img = mpimg.imread("ast.png")
# Show in matplotlib window
plt.imshow(img)
plt.axis('off')
plt.show()
def test_parser(filepath):
# Load program
with open(filepath) as f:
source = f.read()
# Parse input
ast = yacc.parser.parse(source)
# Print plain-text version of the AST (optional)
print("AST object:")
print(ast)
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
test_parser('whileprograms/complex.while')
# Export DOT file
syntax.export_dot(ast, "ast.dot")
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
# Display AST diagram
print("Rendering AST diagram with matplotlib...")
display_ast_graph("ast.dot")
if __name__ == '__main__':
test_parser('triplaprograms/or.tripla')

File diff suppressed because it is too large Load Diff

View File

@@ -6,9 +6,9 @@ _tabversion = '3.10'
_lr_method = 'LALR'
_lr_signature = 'AOP ASSIGN COMMA CONST DO ELSE FALSE ID IF IN LBRACE LET LOP LPAREN RBRACE RELOP RPAREN SEMICOLON THEN TRUE WHILEexpression : CONSTexpression : WHILE expression DO LBRACE expression RBRACE'
_lr_signature = 'EleftCOMPleftEQOPleftAOPAOP ASSIGN COMMA COMP CONST DO ELSE EQOP FALSE ID IF IN LBRACE LET LOP LPAREN RBRACE RPAREN SEMICOLON THEN TRUE WHILEE : LET D IN EE : IDE : ID LPAREN A RPARENE : E AOP EE : LPAREN E RPARENE : CONSTE : ID ASSIGN EE : E SEMICOLON EE : IF LPAREN B RPAREN THEN E ELSE EE : WHILE LPAREN B RPAREN DO LBRACE E RBRACEA : EA : A COMMA ED : ID LPAREN V RPAREN LBRACE E RBRACED : D DV : IDV : V COMMA IDB : E EQOP EB : E COMP EB : B EQOP BB : B LOP BB : TRUEB : FALSEB : LPAREN B RPAREN'
_lr_action_items = {'CONST':([0,3,6,],[2,2,2,]),'WHILE':([0,3,6,],[3,3,3,]),'$end':([1,2,8,],[0,-1,-2,]),'DO':([2,4,8,],[-1,5,-2,]),'RBRACE':([2,7,8,],[-1,8,-2,]),'LBRACE':([5,],[6,]),}
_lr_action_items = {'LET':([0,4,8,9,12,13,15,16,20,26,36,40,41,42,43,49,55,58,60,],[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,]),'ID':([0,2,4,8,9,10,12,13,15,16,19,20,21,26,36,40,41,42,43,46,49,55,58,60,62,],[3,11,3,3,3,11,3,3,3,3,11,3,33,3,3,3,3,3,3,56,3,3,3,3,-13,]),'LPAREN':([0,3,4,6,7,8,9,11,12,13,15,16,20,26,36,40,41,42,43,49,55,58,60,],[4,12,4,15,16,4,4,21,4,4,26,26,4,26,4,26,26,4,4,4,4,4,4,]),'CONST':([0,4,8,9,12,13,15,16,20,26,36,40,41,42,43,49,55,58,60,],[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,]),'IF':([0,4,8,9,12,13,15,16,20,26,36,40,41,42,43,49,55,58,60,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'WHILE':([0,4,8,9,12,13,15,16,20,26,36,40,41,42,43,49,55,58,60,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'$end':([1,3,5,17,18,24,25,32,35,63,64,],[0,-2,-6,-4,-8,-7,-5,-1,-3,-9,-10,]),'AOP':([1,3,5,14,17,18,23,24,25,28,32,35,38,47,52,53,57,59,61,63,64,],[8,-2,-6,8,-4,8,8,8,-5,8,8,-3,8,8,8,8,8,8,8,8,-10,]),'SEMICOLON':([1,3,5,14,17,18,23,24,25,28,32,35,38,47,52,53,57,59,61,63,64,],[9,-2,-6,9,-4,9,9,9,-5,9,9,-3,9,9,9,9,9,9,9,9,-10,]),'RPAREN':([3,5,14,17,18,22,23,24,25,27,29,30,31,32,33,34,35,37,38,47,48,50,51,52,53,56,63,64,],[-2,-6,25,-4,-8,35,-11,-7,-5,39,-21,-22,44,-1,-15,45,-3,48,25,-12,-23,-19,-20,-17,-18,-16,-9,-10,]),'COMMA':([3,5,17,18,22,23,24,25,32,33,34,35,47,56,63,64,],[-2,-6,-4,-8,36,-11,-7,-5,-1,-15,46,-3,-12,-16,-9,-10,]),'EQOP':([3,5,17,18,24,25,27,28,29,30,31,32,35,37,38,48,50,51,52,53,63,64,],[-2,-6,-4,-8,-7,-5,40,42,-21,-22,40,-1,-3,40,42,-23,-19,40,-17,-18,-9,-10,]),'COMP':([3,5,17,18,24,25,28,32,35,38,63,64,],[-2,-6,-4,-8,-7,-5,43,-1,-3,43,-9,-10,]),'LOP':([3,5,17,18,24,25,27,29,30,31,32,35,37,48,50,51,52,53,63,64,],[-2,-6,-4,-8,-7,-5,41,-21,-22,41,-1,-3,41,-23,-19,41,-17,-18,-9,-10,]),'ELSE':([3,5,17,18,24,25,32,35,57,63,64,],[-2,-6,-4,-8,-7,-5,-1,-3,60,-9,-10,]),'RBRACE':([3,5,17,18,24,25,32,35,59,61,63,64,],[-2,-6,-4,-8,-7,-5,-1,-3,62,64,-9,-10,]),'ASSIGN':([3,],[13,]),'IN':([10,19,62,],[20,-14,-13,]),'TRUE':([15,16,26,40,41,],[29,29,29,29,29,]),'FALSE':([15,16,26,40,41,],[30,30,30,30,30,]),'THEN':([39,],[49,]),'DO':([44,],[54,]),'LBRACE':([45,54,],[55,58,]),}
_lr_action = {}
for _k, _v in _lr_action_items.items():
@@ -17,7 +17,7 @@ for _k, _v in _lr_action_items.items():
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'expression':([0,3,6,],[1,4,7,]),}
_lr_goto_items = {'E':([0,4,8,9,12,13,15,16,20,26,36,40,41,42,43,49,55,58,60,],[1,14,17,18,23,24,28,28,32,38,47,28,28,52,53,57,59,61,63,]),'D':([2,10,19,],[10,19,19,]),'A':([12,],[22,]),'B':([15,16,26,40,41,],[27,31,37,50,51,]),'V':([21,],[34,]),}
_lr_goto = {}
for _k, _v in _lr_goto_items.items():
@@ -26,7 +26,28 @@ for _k, _v in _lr_goto_items.items():
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> expression","S'",1,None,None,None),
('expression -> CONST','expression',1,'p_expression_const','triplayacc.py',28),
('expression -> WHILE expression DO LBRACE expression RBRACE','expression',6,'p_expression_while','triplayacc.py',33),
("S' -> E","S'",1,None,None,None),
('E -> LET D IN E','E',4,'p_E_let','triplayacc.py',23),
('E -> ID','E',1,'p_E_id','triplayacc.py',27),
('E -> ID LPAREN A RPAREN','E',4,'p_E_call','triplayacc.py',31),
('E -> E AOP E','E',3,'p_E_aop','triplayacc.py',36),
('E -> LPAREN E RPAREN','E',3,'p_E_paren','triplayacc.py',40),
('E -> CONST','E',1,'p_E_const','triplayacc.py',45),
('E -> ID ASSIGN E','E',3,'p_E_assign','triplayacc.py',49),
('E -> E SEMICOLON E','E',3,'p_E_seq','triplayacc.py',53),
('E -> IF LPAREN B RPAREN THEN E ELSE E','E',8,'p_E_if','triplayacc.py',57),
('E -> WHILE LPAREN B RPAREN DO LBRACE E RBRACE','E',8,'p_E_while','triplayacc.py',61),
('A -> E','A',1,'p_A_single','triplayacc.py',69),
('A -> A COMMA E','A',3,'p_A_multiple','triplayacc.py',73),
('D -> ID LPAREN V RPAREN LBRACE E RBRACE','D',7,'p_D_single','triplayacc.py',81),
('D -> D D','D',2,'p_D_concat','triplayacc.py',85),
('V -> ID','V',1,'p_V_single','triplayacc.py',93),
('V -> V COMMA ID','V',3,'p_V_multiple','triplayacc.py',97),
('B -> E EQOP E','B',3,'p_B_eqop_E','triplayacc.py',105),
('B -> E COMP E','B',3,'p_B_comp','triplayacc.py',109),
('B -> B EQOP B','B',3,'p_B_eqop_B','triplayacc.py',113),
('B -> B LOP B','B',3,'p_B_lop','triplayacc.py',117),
('B -> TRUE','B',1,'p_B_true','triplayacc.py',121),
('B -> FALSE','B',1,'p_B_false','triplayacc.py',125),
('B -> LPAREN B RPAREN','B',3,'p_B_paren','triplayacc.py',129),
]

View File

@@ -1,79 +1,140 @@
# (c) Stephan Diehl, University of Trier, Germany, 2025
class EXPRESSION:
ppcount=0
pp_count = 0
def __init__(self):
self.pp=EXPRESSION.ppcount
EXPRESSION.ppcount=EXPRESSION.ppcount+1
self.pp = EXPRESSION.pp_count
EXPRESSION.pp_count += 1
def copy(self):
@staticmethod
def copy():
return EXPRESSION()
def allNodes(self):
ret = [self]
for node in (self.__getattribute__(a) for a in self.__dict__.keys()):
if isinstance(node, EXPRESSION):
ret = ret + node.allNodes()
ret += node.allNodes()
if isinstance(node, list):
for n in node:
if isinstance(n, EXPRESSION):
ret = ret + n.allNodes()
ret += n.allNodes()
return ret
def children(self):
"""Return a list of (name, childNode)."""
out = []
for key, value in self.__dict__.items():
if key == "pp":
continue
if isinstance(value, EXPRESSION):
out.append((key, value))
elif isinstance(value, list):
for i, elem in enumerate(value):
if isinstance(elem, EXPRESSION):
out.append((f"{key}[{i}]", elem))
return out
def to_dot(self, out):
# node label is class name or class name + value
label = type(self).__name__
if hasattr(self, "operator"): # AOP/EQOP/COMP/LOP
label += f"({self.operator})"
if hasattr(self, "name"): # ID
label += f"({self.name})"
if hasattr(self, "value"): # CONST
label += f"({self.value})"
out.write(f' node{self.pp} [label="{label}"];\n')
for (edge_name, child) in self.children():
out.write(f' node{self.pp} -> node{child.pp} [label="{edge_name}"];\n')
child.to_dot(out)
class LET(EXPRESSION):
def __init__(self, declarations, body):
super().__init__()
self.declarations = declarations
self.body = body
def __str__(self): return "let " \
+','.join([ str(decl) for decl in self.declarations ]) \
+ " in " + str(self.body)
def __str__(self):
return "let " + ", ".join(str(d) for d in self.declarations) + " in " + str(self.body)
class DECL(EXPRESSION):
def __init__(self, fname, params, body):
self.fname=fname
def __init__(self, f_name, params, body):
super().__init__()
self.f_name = f_name
self.params = params
self.body = body
def __str__(self): return self.fname+"(" \
+','.join([ str(param) for param in self.params ]) \
+"){ "+str(self.body)+" }"
def __str__(self):
return f"{self.f_name}(" + ",".join(str(p) for p in self.params) + ") { " + str(self.body) + " }"
class CALL(EXPRESSION):
def __init__(self, fname, arguments):
def __init__(self, f_name, arguments):
super().__init__()
self.fname=fname
self.f_name = f_name
self.arguments = arguments
def __str__(self): return self.fname+"(" \
+','.join([ str(arg) for arg in self.arguments ]) +")"
def __str__(self):
return self.f_name + "(" + ",".join(str(a) for a in self.arguments) + ")"
class VAR(EXPRESSION):
class ID(EXPRESSION):
def __init__(self, name):
super().__init__()
self.name = name
def __str__(self): return self.name
class BINOP(EXPRESSION):
def __init__(self,operator,arg1,arg2):
super().__init__()
self.operator=operator
self.arg1=arg1
self.arg2=arg2
def __str__(self): return "("+str(self.arg1)+self.operator+str(self.arg2)+")"
def __str__(self):
return self.name
class CONST(EXPRESSION):
def __init__(self, value):
super().__init__()
self.value = value
def __str__(self): return str(self.value)
def __str__(self):
return str(self.value)
class AOP(EXPRESSION):
def __init__(self, operator, arg1, arg2):
super().__init__()
self.operator = operator
self.arg1 = arg1
self.arg2 = arg2
def __str__(self):
return "(" + str(self.arg1) + " " + str(self.operator) + " " + str(self.arg2) + ")"
class EQOP(EXPRESSION):
def __init__(self, operator, arg1, arg2):
super().__init__()
self.operator = operator
self.arg1 = arg1
self.arg2 = arg2
def __str__(self):
return "(" + str(self.arg1) + " " + str(self.operator) + " " + str(self.arg2) + ")"
class COMP(EXPRESSION):
def __init__(self, operator, arg1, arg2):
super().__init__()
self.operator = operator
self.arg1 = arg1
self.arg2 = arg2
def __str__(self):
return "(" + str(self.arg1) + " " + str(self.operator) + " " + str(self.arg2) + ")"
class LOP(EXPRESSION):
def __init__(self, operator, arg1, arg2):
super().__init__()
self.operator = operator
self.arg1 = arg1
self.arg2 = arg2
def __str__(self):
return "(" + str(self.arg1) + " " + str(self.operator) + " " + str(self.arg2) + ")"
class ASSIGN(EXPRESSION):
def __init__(self, variable, expression):
@@ -81,7 +142,8 @@ class ASSIGN(EXPRESSION):
self.variable = variable
self.expression = expression
def __str__(self): return self.variable.name+"="+str(self.expression)
def __str__(self):
return self.variable.name + " = " + str(self.expression)
class SEQ(EXPRESSION):
def __init__(self, exp1, exp2):
@@ -89,7 +151,8 @@ class SEQ(EXPRESSION):
self.exp1 = exp1
self.exp2 = exp2
def __str__(self): return str(self.exp1)+";"+str(self.exp2)
def __str__(self):
return str(self.exp1) + "; " + str(self.exp2)
class IF(EXPRESSION):
def __init__(self, condition, exp1, exp2):
@@ -98,16 +161,8 @@ class IF(EXPRESSION):
self.exp1 = exp1
self.exp2 = exp2
def __str__(self): return "if "+str(self.condition)+" then { " \
+ str(self.exp1)+" } else { "+str(self.exp2)+" } "
class DO(EXPRESSION):
def __init__(self,body,condition):
super().__init__()
self.body=body
self.condition=condition
def __str__(self): return "do { "+str(self.body)+" } while "+str(self.condition)
def __str__(self):
return "if (" + str(self.condition) + ") then { " + str(self.exp1) + " } else { " + str(self.exp2) + " }"
class WHILE(EXPRESSION):
def __init__(self, condition, body):
@@ -115,18 +170,21 @@ class WHILE(EXPRESSION):
self.condition = condition
self.body = body
def __str__(self): return "while "+str(self.condition)+" do { "+str(self.body)+" }"
# see https://stackoverflow.com/questions/51753937/python-pretty-print-nested-objects
def __str__(self):
return "while (" + str(self.condition) + ") do { " + str(self.body) + " }"
def pretty_print(clas, indent=0):
print(' ' * indent + type(clas).__name__ + ':')
indent += 4
for k, v in clas.__dict__.items():
if '__dict__' in dir(v):
if isinstance(v, EXPRESSION):
pretty_print(v, indent)
else:
print(' ' * indent + k + ': ' + str(v))
print(' ' * indent + f"{k}: {v}")
def export_dot(ast, filename="ast.dot"):
with open(filename, "w") as f:
f.write("digraph AST {\n")
f.write(" node [shape=box];\n")
ast.to_dot(f)
f.write("}\n")

View File

@@ -16,12 +16,13 @@ reserved = {
'false': 'FALSE'
}
# List of token names. This is always required
# List of token names. This is always requiredy
tokens = [
'ID',
'CONST',
'AOP',
'RELOP',
'COMP',
'EQOP',
'LOP',
'ASSIGN',
'LPAREN', 'RPAREN',
@@ -43,10 +44,13 @@ t_ASSIGN = r'='
t_AOP = r'\+|\-|\*|/'
# Comparison operators
t_RELOP = r'<=|>=|==|!=|<|>'
t_COMP = r'<=|>=|<|>'
# Equality operators
t_EQOP = r'\|\||&&|==|!='
# Logical operators
t_LOP = r'\|\||&&|==|!='
t_LOP = r'\|\||&&'
# IDs
def t_ID(t):

View File

@@ -1,45 +1,140 @@
# ------------------------------------------------------------
# triplayacc.py
#
# Yacc grammar of the TRIPLA language
''' Here an initial grammar
E -> while E do { E }
| CONST
CONST: Positive, integer numbers = 0 | [1-9][0-9]*
'''
# Note: For LALR(1) left recursion is preferred
# Grammar of the TRIPLA language
# ------------------------------------------------------------
import ply.yacc as yacc
import syntax as ast
# Get the token map from the lexer. This is required.
from triplalex import tokens
# Operator precedence
precedence = (
('left', 'COMP'),
('left', 'EQOP'),
('left', 'AOP'),
)
def p_expression_const(p):
'expression : CONST'
start = 'E'
# ------------------------------------------------------------
# Rules for E
# ------------------------------------------------------------
def p_E_let(p):
'E : LET D IN E'
p[0] = ast.LET(p[2], p[4])
def p_E_id(p):
'E : ID'
p[0] = ast.ID(p[1])
def p_E_call(p):
'E : ID LPAREN A RPAREN'
# E : ID(A)
p[0] = ast.CALL(p[1], p[3])
def p_E_aop(p):
'E : E AOP E'
p[0] = ast.AOP(p[2], p[1], p[3])
def p_E_paren(p):
'E : LPAREN E RPAREN'
# E : (E)
p[0] = p[2]
def p_E_const(p):
'E : CONST'
p[0] = ast.CONST(p[1])
def p_E_assign(p):
'E : ID ASSIGN E'
p[0] = ast.ASSIGN(ast.ID(p[1]), p[3])
def p_expression_while(p):
'expression : WHILE expression DO LBRACE expression RBRACE'
p[0] = ast.WHILE(p[2],p[5])
def p_E_seq(p):
'E : E SEMICOLON E'
p[0] = ast.SEQ(p[1], p[3])
#def p_empty(p):
# 'empty :'
# pass
def p_E_if(p):
'E : IF LPAREN B RPAREN THEN E ELSE E'
p[0] = ast.IF(p[3], p[6], p[8])
# Error rule for syntax errors
def p_E_while(p):
'E : WHILE LPAREN B RPAREN DO LBRACE E RBRACE'
p[0] = ast.WHILE(p[3], p[7])
# ------------------------------------------------------------
# Rules for A
# ------------------------------------------------------------
def p_A_single(p):
'A : E'
p[0] = [p[1]]
def p_A_multiple(p):
'A : A COMMA E'
p[0] = p[1] + [p[3]]
# ------------------------------------------------------------
# Rules for D
# ------------------------------------------------------------
def p_D_single(p):
'D : ID LPAREN V RPAREN LBRACE E RBRACE'
p[0] = ast.DECL(p[1], p[3], p[6])
def p_D_concat(p):
'D : D D'
p[0] = p[1] + [p[2]] if isinstance(p[1], list) else [p[1], p[2]]
# ------------------------------------------------------------
# Rules for V
# ------------------------------------------------------------
def p_V_single(p):
'V : ID'
p[0] = [p[1]]
def p_V_multiple(p):
'V : V COMMA ID'
p[0] = p[1] + [p[3]]
# ------------------------------------------------------------
# Rules for B
# ------------------------------------------------------------
def p_B_eqop_E(p):
'B : E EQOP E'
p[0] = ast.EQOP(p[2], p[1], p[3])
def p_B_comp(p):
'B : E COMP E'
p[0] = ast.COMP(p[2], p[1], p[3])
def p_B_eqop_B(p):
'B : B EQOP B'
p[0] = ast.EQOP(p[2], p[1], p[3])
def p_B_lop(p):
'B : B LOP B'
p[0] = ast.LOP(p[2], p[1], p[3])
def p_B_true(p):
'B : TRUE'
p[0] = ast.CONST(True)
def p_B_false(p):
'B : FALSE'
p[0] = ast.CONST(False)
def p_B_paren(p):
'B : LPAREN B RPAREN'
# B : (B)
p[0] = p[2]
# Error handling
def p_error(p):
print("Syntax error in input!")
if p:
print("Syntax error at token:", p.type, "value:", p.value)
else:
print("Syntax error at EOF")
# Build the parser
parser = yacc.yacc() # debug=True
parser = yacc.yacc()

View File

@@ -1,2 +0,0 @@
while while 1 do { 2 }
do { while 3 do { 4 } }

View File

@@ -1 +0,0 @@
while 1 do { 2 }