Remove edges in constructor

This commit is contained in:
Jan-Niclas Loosen
2026-01-22 20:26:41 +01:00
parent 3abe8581b5
commit 51028555de
77 changed files with 3964 additions and 309 deletions

View File

@@ -4,32 +4,127 @@ from .CFG_Node import *
class CFG:
def __init__(self, in_node: CFG_Node, out_node: CFG_Node):
def __init__(self, in_node: CFG_Node, out_node: CFG_Node, ast=None):
self.in_node = in_node
self.out_node = out_node
self.ast = ast
# If AST is provided, filter the graph by removing empty nodes
if ast is not None:
self._filter_graph()
def _filter_graph(self):
"""
Filter the CFG by removing empty nodes and rewiring edges.
This should be done once during construction, not during to_dot().
"""
# Collect all nodes in the graph
all_nodes = set()
self._collect_nodes(self.in_node, all_nodes)
# Identify nodes to remove
nodes_to_remove = [node for node in all_nodes if self._should_remove_node(node)]
# Remove nodes and rewrite edges
for node in nodes_to_remove:
self._remove_node_and_rewire(node)
def _collect_nodes(self, node, node_set):
"""Recursively collect all nodes in the graph"""
if node in node_set:
return
node_set.add(node)
for child in node.children:
self._collect_nodes(child, node_set)
def _should_remove_node(self, node):
"""Determine if a node should be removed from the graph"""
# Remove empty nodes (nodes with no meaningful content)
# Check for both None and "None" string
if hasattr(node, 'label') and ((node.label is None) or (node.label == "None")):
# Nodes with AST nodes should NOT be removed - they will get labels from AST
if node.ast_node is not None:
return False
# Also keep global START nodes (they have label=None but should be shown)
if hasattr(node, 'dot_label') and node.dot_label() == "START":
return False
# Remove nodes that have no AST and no meaningful label
return True
# Remove global END nodes (those without function names)
if hasattr(node, 'dot_label'):
if node.dot_label() in ["END"]:
# Keep function-specific END nodes, skip global ones
if hasattr(node, 'label') and node.label and '(' in node.label and ')' in node.label:
return False
else:
return True
return False
def _remove_node_and_rewire(self, node):
"""Remove a node from the graph and rewire edges to bypass it"""
# Store original children before modification
original_children = list(node.children)
# For each parent, rewire edges to bypass this node
for parent in list(node.parents):
if node in parent.children:
# Find appropriate targets based on node type
if hasattr(node, 'dot_shape') and node.dot_shape() == "diamond":
# For diamond nodes, preserve T/F branches
targets = []
if len(original_children) >= 1:
true_target = self._find_first_non_empty_child(original_children[0])
if true_target:
targets.append(true_target)
if len(original_children) >= 2:
false_target = self._find_first_non_empty_child(original_children[1])
if false_target:
targets.append(false_target)
else:
# For regular nodes, find all non-empty targets
targets = []
for child in original_children:
target = self._find_first_non_empty_child(child)
if target and target not in targets:
targets.append(target)
# Remove edge from parent to node
parent.remove_child(node, propagate=False)
# Add edges from parent to targets
for target in targets:
parent.add_child(target, propagate=False)
# Clear the node's connections
node.parents.clear()
node.children.clear()
def _find_first_non_empty_child(self, node):
"""Find the first non-empty descendant of a node"""
if not self._should_remove_node(node):
return node
# Recursively check children
for child in sorted(node.children, key=lambda n: n.id):
result = self._find_first_non_empty_child(child)
if result is not None:
return result
return None
def to_dot(self) -> str:
"""
Convert the CFG to DOT format.
This method should ONLY handle formatting, not graph modifications.
All graph filtering and modifications should be done in the constructor.
"""
visited = set()
visited_nodes = []
lines = ["digraph CFG {"]
lines.append(' node [fontname="Helvetica"];')
def node_label(node: CFG_Node) -> str | None | Any:
# Skip empty nodes (nodes with no meaningful content)
if hasattr(node, 'label') and node.label == "None":
return None
# Skip global START/END nodes (those without function names)
if hasattr(node, 'dot_label'):
if node.dot_label() in ["START", "END"]:
# Keep function-specific START/END nodes, skip global ones
if hasattr(node, 'label') and node.label and '(' in node.label and ')' in node.label:
# This is a function START/END node, keep it
pass
else:
# This is a global START/END node, skip it
return None
# Use custom label if available
if hasattr(node, 'label') and node.label:
# Remove node ID from label for certain node types
@@ -64,34 +159,16 @@ class CFG:
styles.append('color=green')
return ', '.join(styles) if styles else ''
def find_first_non_empty_child(node: CFG_Node):
if node_label(node) is not None:
return node
# Recursively check children
for child in sorted(node.children, key=lambda n: n.id):
result = find_first_non_empty_child(child)
if result is not None:
return result
return None
def visit(node: CFG_Node):
if node.id in visited:
return
label = node_label(node)
visited_nodes.append(node) # Track all visited nodes
# Skip nodes that should not be included in the output
if label is None:
visited.add(node.id)
# Still need to visit children to maintain connectivity
for child in sorted(node.children, key=lambda n: n.id):
visit(child)
return
visited.add(node.id)
label = node_label(node)
if label is None:
# This shouldn't happen if the constructor did its job properly
return
shape = node_shape(node)
style = node_style(node)
@@ -101,74 +178,8 @@ class CFG:
f' n{node.id} [label="{label}", shape={shape}{style_str}];'
)
# Add edges to children
for i, child in enumerate(sorted(node.children, key=lambda n: n.id)):
# Skip edges to nodes that should not be included
child_label = node_label(child)
if child_label is None:
# For diamond nodes, we need to find the actual target nodes
# that the empty node connects to
if hasattr(node, 'dot_shape') and node.dot_shape() == "diamond":
# Find the first non-empty descendant of this empty node
actual_target = find_first_non_empty_child(child)
if actual_target is not None:
target_label = node_label(actual_target)
if target_label is not None:
# Add edge from diamond to actual target
edge_label = ""
if i == 0:
edge_label = ' [label="T"]'
elif i == 1:
edge_label = ' [label="F"]'
lines.append(f" n{node.id} -> n{actual_target.id}{edge_label};")
visit(actual_target)
continue
# For regular nodes that connect to empty join nodes,
# we need to find where the join node connects to
if child_label is None and len(child.children) > 0:
# This might be a join node - find where it connects to
join_targets = []
for grandchild in sorted(child.children, key=lambda n: n.id):
grandchild_label = node_label(grandchild)
if grandchild_label is not None:
join_targets.append(grandchild)
# If we found targets, connect directly to them
if join_targets:
for target in join_targets:
lines.append(f" n{node.id} -> n{target.id};")
visit(target)
continue
# Special handling for RETURN nodes that connect to empty cont nodes
# This is especially important for recursive function calls
if (label and (label.startswith("RET ") or label.startswith("CALL ")) and
child_label is None and len(child.children) > 0):
# This is a RETURN/CALL node connecting to an empty cont node
# Recursively find all non-empty targets that the cont node connects to
def find_all_targets(n):
"""Recursively find all non-empty targets"""
targets = []
if node_label(n) is not None:
targets.append(n)
else:
for grandchild in sorted(n.children, key=lambda n: n.id):
targets.extend(find_all_targets(grandchild))
return targets
cont_targets = find_all_targets(child)
# Connect the RETURN/CALL node directly to the cont node's targets
if cont_targets:
for target in cont_targets:
lines.append(f" n{node.id} -> n{target.id};")
visit(target)
continue
# Visit the child but don't create an edge
visit(child)
continue
# Add edge labels for diamond nodes (conditional branches)
edge_label = ""
if hasattr(node, 'dot_shape') and node.dot_shape() == "diamond":
@@ -179,26 +190,6 @@ class CFG:
lines.append(f" n{node.id} -> n{child.id}{edge_label};")
visit(child)
# Add special edges for recursive calls in function g
# This handles the specific case where RET g(y) should connect to the x variable
if label and label.startswith("RET g(y)"):
# Find the FINAL x variable node that leads to function end
final_x_node = None
for target_node in visited_nodes:
target_label = node_label(target_node)
if target_label == "x" and target_node.id != node.id:
# Check if this x node connects to END g(x)
for child in target_node.children:
child_label = node_label(child)
if child_label and child_label.startswith("END g(x)"):
final_x_node = target_node
break
if final_x_node:
break
if final_x_node:
lines.append(f" n{node.id} -> n{final_x_node.id};")
# Start the CFG traversal from the entry node
visit(self.in_node)