Start refactoring task 1 and 2
This commit is contained in:
128
Project-02-03-04-05/cfa/BackwardAnalysis.py
Normal file
128
Project-02-03-04-05/cfa/BackwardAnalysis.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
from collections import deque
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import cfg_build
|
||||
import syntax
|
||||
from cfg.CFG_Node import CFG_START
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cfg.CFG import CFG
|
||||
|
||||
GLOBAL_SCOPE = ""
|
||||
|
||||
# A scoped variable: the function it belongs to, and its name.
|
||||
# The scope is GLOBAL_SCOPE ("") for variables outside any function.
|
||||
# e.g. ("f", "x") → variable "x" defined in function "f"
|
||||
# ("", "x") → variable "x" at global scope
|
||||
Var = tuple[str, str]
|
||||
|
||||
class BackwardAnalysis:
|
||||
def __init__(self, cfg: "CFG") -> None:
|
||||
self.cfg = cfg
|
||||
self.uses: dict[int, set[Var]] = {}
|
||||
self.defs: dict[int, set[Var]] = {}
|
||||
|
||||
self.__funcs: dict[str, tuple] = dict(cfg_build.FUNCTIONS)
|
||||
self.__func_parent, self._func_params = self.__collect_function_metadata()
|
||||
self.__func_scope: dict[int, str] = self.__compute_function_scope()
|
||||
self.__extract_uses_and_defs()
|
||||
|
||||
# Walk the AST and collect function-parent and parameter information.
|
||||
def __collect_function_metadata(self) -> tuple[dict[str, str | None], dict[str, tuple[str, ...]]]:
|
||||
func_parent: dict[str, str | None] = {}
|
||||
func_params: dict[str, tuple[str, ...]] = {}
|
||||
|
||||
def visit(expr: syntax.EXPRESSION | None, current_func: str | None) -> None:
|
||||
if expr is None:
|
||||
return
|
||||
if isinstance(expr, syntax.LET):
|
||||
decls = expr.decl if isinstance(expr.decl, list) else [expr.decl]
|
||||
# Register metadata for each declared function.
|
||||
for d in decls:
|
||||
if isinstance(d, syntax.DECL):
|
||||
func_parent[d.f_name] = current_func
|
||||
func_params[d.f_name] = tuple(d.params)
|
||||
# Recurse into function bodies and the in-expression.
|
||||
for d in decls:
|
||||
if isinstance(d, syntax.DECL):
|
||||
visit(d.body, d.f_name)
|
||||
else:
|
||||
visit(d, current_func)
|
||||
visit(expr.body, current_func)
|
||||
return
|
||||
for _, child in expr.children():
|
||||
visit(child, current_func)
|
||||
|
||||
visit(self.cfg.ast, None)
|
||||
return func_parent, func_params
|
||||
|
||||
# Calculates the scope (in which function is it?) of each node in the CFG.
|
||||
def __compute_function_scope(self) -> dict[int, str]:
|
||||
# The first function whose BFS claims a node wins.
|
||||
functions = self.__funcs
|
||||
func_scope: dict[int, str] = {}
|
||||
all_f_start_ids: set[int] = {fs.id for _, (fs, _) in functions.items()}
|
||||
|
||||
for f_name, (f_start, f_end) in functions.items():
|
||||
queue: deque = deque([f_start])
|
||||
while queue:
|
||||
node = queue.popleft()
|
||||
if node.id in func_scope:
|
||||
continue # already claimed by an earlier function
|
||||
func_scope[node.id] = f_name
|
||||
# Stop here — do not follow into the caller context.
|
||||
if node.id == f_end.id:
|
||||
continue
|
||||
for child in node.children:
|
||||
# Do not follow into a different function's START.
|
||||
if (
|
||||
isinstance(child, CFG_START)
|
||||
and child.id in all_f_start_ids
|
||||
and child.id != f_start.id
|
||||
):
|
||||
continue
|
||||
queue.append(child)
|
||||
|
||||
return func_scope
|
||||
|
||||
# Populate uses and defs for every node in the CFG.
|
||||
def __extract_uses_and_defs(self) -> None:
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
func = self.__func_scope.get(nid)
|
||||
ast = node.ast_node
|
||||
|
||||
uses: set[Var] = set()
|
||||
defs: set[Var] = set()
|
||||
|
||||
if isinstance(node, CFG_START) and isinstance(ast, syntax.DECL):
|
||||
# Function entry defines each formal parameter.
|
||||
for param in ast.params:
|
||||
defs.add((ast.f_name, param))
|
||||
elif ast is not None:
|
||||
if isinstance(ast, syntax.ID):
|
||||
resolved = self.__resolve_var(func, ast.name)
|
||||
uses.add(resolved)
|
||||
elif isinstance(ast, syntax.ASSIGN):
|
||||
resolved = self.__resolve_var(func, ast.var.name)
|
||||
defs.add(resolved)
|
||||
|
||||
self.uses[nid] = uses
|
||||
self.defs[nid] = defs
|
||||
|
||||
# Resolve a variables name and scope by walking up the hierarchy
|
||||
def __resolve_var(self, func: str | None, name: str) -> Var:
|
||||
if func is None:
|
||||
return GLOBAL_SCOPE, name
|
||||
|
||||
cur: str | None = func
|
||||
seen: set[str] = set()
|
||||
while cur is not None and cur not in seen:
|
||||
seen.add(cur)
|
||||
if name in self._func_params.get(cur, ()):
|
||||
return cur, name
|
||||
cur = self.__func_parent.get(cur)
|
||||
|
||||
# Fallback: local variable in the current function scope
|
||||
return func, name
|
||||
64
Project-02-03-04-05/cfa/LiveVariables.py
Normal file
64
Project-02-03-04-05/cfa/LiveVariables.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from __future__ import annotations
|
||||
from typing import TYPE_CHECKING
|
||||
from cfa.BackwardAnalysis import BackwardAnalysis, Var
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cfg.CFG import CFG
|
||||
|
||||
class LiveVariables(BackwardAnalysis):
|
||||
def __init__(self, cfg: "CFG") -> None:
|
||||
# Base populates uses, defs, _func_scope, etc.
|
||||
super().__init__(cfg)
|
||||
|
||||
self.gen: dict[int, set[Var]] = {}
|
||||
self.kill: dict[int, set[Var]] = {}
|
||||
self.incoming: dict[int, set[Var]] = {}
|
||||
self.outgoing: dict[int, set[Var]] = {}
|
||||
|
||||
self.__init_sets()
|
||||
self.solve()
|
||||
|
||||
# Initialize gen, kill, in, and out sets for all CFG nodes.
|
||||
def __init_sets(self) -> None:
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
|
||||
# GEN(n) = USE(n); KILL(n) = DEF(n)
|
||||
self.gen[nid] = set(self.uses[nid])
|
||||
self.kill[nid] = set(self.defs[nid])
|
||||
|
||||
# IN(n) = GEN(n) = USE(n); OUT(n) = empty
|
||||
self.incoming[nid] = set(self.gen[nid])
|
||||
self.outgoing[nid] = set()
|
||||
|
||||
# Update the lists until the fixpoint.
|
||||
def solve(self) -> None:
|
||||
nodes = list(self.cfg.nodes())
|
||||
known: set[int] = set(n.id for n in nodes)
|
||||
|
||||
# while there are changes do
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
|
||||
# for all v IN V do
|
||||
for node in nodes:
|
||||
nid = node.id
|
||||
|
||||
# OUT(n) = UNION IN(s) for all successors s
|
||||
new_out: set[Var] = set()
|
||||
for child in node.children:
|
||||
if child.id in known:
|
||||
new_out |= self.incoming[child.id]
|
||||
|
||||
# IN(n) = (OUT(n) MINUS KILL(n)) UNION GEN(n)
|
||||
new_in: set[Var] = (new_out - self.kill[nid]) | self.gen[nid]
|
||||
|
||||
if new_out != self.outgoing[nid] or new_in != self.incoming[nid]:
|
||||
self.outgoing[nid] = new_out
|
||||
self.incoming[nid] = new_in
|
||||
changes = True # there are changes -> loop again
|
||||
|
||||
# Return the living variables within each node
|
||||
def live_vars_by_node(self) -> dict[int, set[Var]]:
|
||||
return {nid: set(vs) for nid, vs in self.incoming.items() if vs}
|
||||
97
Project-02-03-04-05/cfa/ReachedUses.py
Normal file
97
Project-02-03-04-05/cfa/ReachedUses.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
from typing import TYPE_CHECKING
|
||||
from cfa.BackwardAnalysis import BackwardAnalysis, Var
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cfg.CFG import CFG
|
||||
|
||||
# A single use-fact: the CFG node at which a variable is used.
|
||||
# e.g. (42, ("f", "x")) -> variable "x" in function "f" is used at node 42
|
||||
UseFact = tuple[int, Var]
|
||||
|
||||
class ReachedUses(BackwardAnalysis):
|
||||
def __init__(self, cfg: "CFG") -> None:
|
||||
# Base populates: uses, defs, _func_scope, _func_parent, _func_params.
|
||||
super().__init__(cfg)
|
||||
|
||||
self.gen: dict[int, set[UseFact]] = {}
|
||||
self.kill: dict[int, set[UseFact]] = {}
|
||||
self.in_sets: dict[int, set[UseFact]] = {}
|
||||
self.out_sets: dict[int, set[UseFact]] = {}
|
||||
self.all_uses_by_var: dict[Var, set[UseFact]] = {}
|
||||
|
||||
self.__init_sets()
|
||||
self.solve()
|
||||
|
||||
# Initialize gen, kill, in, and out sets for all CFG nodes.
|
||||
def __init_sets(self) -> None:
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
|
||||
# GEN(n) = { (n.id, var) | var IN USE(n) }
|
||||
self.gen[nid] = {(nid, var) for var in self.uses[nid]}
|
||||
|
||||
# IN(n) = GEN(n); OUT(n) = empty
|
||||
self.in_sets[nid] = set(self.gen[nid])
|
||||
self.out_sets[nid] = set()
|
||||
|
||||
# KILL(n) requires knowing all use-facts for a given variable — "at which nodes is variable x used anywhere?"
|
||||
# all_uses_by_var builds this lookup once upfront: ("f", "x") -> { (42, ("f","x")), (17, ("f","x")) }
|
||||
for nid, facts in self.gen.items():
|
||||
for (uid, var) in facts:
|
||||
self.all_uses_by_var.setdefault(var, set()).add((uid, var))
|
||||
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
|
||||
# KILL(n) = { (uid, var) | var IN DEF(n), (uid, var) IN use_facts_by_var[var] }
|
||||
# When n defines a variable, it kills all use-facts for that variable, because no use reachable from n
|
||||
# can have been reached by an earlier definition of the same variable.
|
||||
kill_n: set[UseFact] = set()
|
||||
for var in self.defs[nid]:
|
||||
if var in self.all_uses_by_var:
|
||||
kill_n |= self.all_uses_by_var[var]
|
||||
self.kill[nid] = kill_n
|
||||
|
||||
# Update the lists until the fixpoint.
|
||||
def solve(self) -> None:
|
||||
nodes = list(self.cfg.nodes())
|
||||
known: set[int] = set(n.id for n in nodes)
|
||||
|
||||
# while there are changes do
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
|
||||
# for all v in V do
|
||||
for node in nodes:
|
||||
nid = node.id
|
||||
|
||||
# OUT(n) = UNION IN(s) for all successors s
|
||||
new_out: set[UseFact] = set()
|
||||
for child in node.children:
|
||||
if child.id in known:
|
||||
new_out |= self.in_sets[child.id]
|
||||
|
||||
# IN(n) = GEN(n) UNION (OUT(n) MINUS KILL(n))
|
||||
new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid])
|
||||
|
||||
if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]:
|
||||
self.out_sets[nid] = new_out
|
||||
self.in_sets[nid] = new_in
|
||||
changes = True # there are changes -> loop again
|
||||
|
||||
# Return the final reached-uses result
|
||||
def reached_uses_by_node(self) -> dict[int, list[int]]:
|
||||
result: dict[int, list[int]] = {}
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
defs_n = self.defs[nid]
|
||||
if not defs_n:
|
||||
continue
|
||||
reached: set[int] = set()
|
||||
for (uid, var) in self.out_sets[nid]:
|
||||
if var in defs_n:
|
||||
reached.add(uid)
|
||||
result[nid] = sorted(reached)
|
||||
return result
|
||||
@@ -1,9 +1,11 @@
|
||||
from .live_variables import LiveVariablesAnalysis, Var
|
||||
from .reached_uses import ReachedUsesAnalysis, UseFact
|
||||
from .BackwardAnalysis import BackwardAnalysis, Var
|
||||
from .LiveVariables import LiveVariables
|
||||
from .ReachedUses import ReachedUses, UseFact
|
||||
|
||||
__all__ = [
|
||||
"Var",
|
||||
"UseFact",
|
||||
"LiveVariablesAnalysis",
|
||||
"ReachedUsesAnalysis",
|
||||
"BackwardAnalysis",
|
||||
"LiveVariables",
|
||||
"ReachedUses",
|
||||
]
|
||||
|
||||
@@ -1,351 +0,0 @@
|
||||
"""
|
||||
live_variables.py — Live Variables backward dataflow analysis for TRIPLA CFGs.
|
||||
|
||||
A variable v is *live* at the entry of node n if there exists a path
|
||||
n → … → use(v) where v is not redefined along the way.
|
||||
|
||||
Data structures
|
||||
---------------
|
||||
gen dict[int, set[Var]] — GEN(n) = variables *used* at n
|
||||
kill dict[int, set[Var]] — KILL(n) = variables *defined* at n
|
||||
in_sets dict[int, set[Var]] — live variables at node *entry*
|
||||
out_sets dict[int, set[Var]] — live variables at node *exit*
|
||||
|
||||
Transfer equations (backward):
|
||||
OUT(n) = ∪ IN(s) for all successors s
|
||||
IN(n) = (OUT(n) − KILL(n)) ∪ GEN(n)
|
||||
|
||||
Variables are represented in scoped form ``(scope, name)``, e.g. ``("f","x")``.
|
||||
This avoids collisions between equal variable names in different functions.
|
||||
|
||||
This module also exports ``_BackwardAnalysisBase``, the shared base class
|
||||
that ``ReachedUsesAnalysis`` in reached_uses.py inherits from. The base
|
||||
provides:
|
||||
• AST traversal to collect function-nesting and parameter metadata
|
||||
• Lexical variable resolution (parameter shadowing handled correctly)
|
||||
• BFS-based CFG-node → owning-function assignment
|
||||
• Unified uses / defs extraction for all node types
|
||||
|
||||
Var = tuple[str, str]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import deque
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import cfg_build
|
||||
import syntax
|
||||
from cfg.CFG_Node import CFG_START
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cfg.CFG import CFG
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public type alias (imported by reached_uses.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
GLOBAL_SCOPE = ""
|
||||
Var = tuple[str, str] # (function_name|GLOBAL_SCOPE, variable_name)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Shared base: function metadata, scope assignment, uses/defs extraction
|
||||
# ============================================================================
|
||||
|
||||
class _BackwardAnalysisBase:
|
||||
"""Infrastructure shared by LiveVariablesAnalysis and ReachedUsesAnalysis.
|
||||
|
||||
Calling ``super().__init__(cfg)`` from a subclass:
|
||||
1. Snapshots cfg_build.FUNCTIONS.
|
||||
2. Collects AST-level function-nesting and parameter metadata.
|
||||
3. BFS-assigns every CFG node to its owning function.
|
||||
4. Extracts uses and defs for every CFG node.
|
||||
|
||||
After __init__ the following attributes are available to subclasses:
|
||||
|
||||
self.cfg — the CFG object
|
||||
self._functions — dict[str, tuple]: snapshot of cfg_build.FUNCTIONS
|
||||
self._func_parent — dict[str, str|None]: lexical parent per function
|
||||
self._func_params — dict[str, tuple[str,...]]: params per function
|
||||
self._func_scope — dict[int, str]: node-id → owning function name
|
||||
self.uses — dict[int, set[Var]]: variables used at each node
|
||||
self.defs — dict[int, set[Var]]: variables defined at each node
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: "CFG") -> None:
|
||||
self.cfg = cfg
|
||||
# Snapshot FUNCTIONS so later global-state resets do not affect us.
|
||||
self._functions: dict[str, tuple] = dict(cfg_build.FUNCTIONS)
|
||||
|
||||
self.uses: dict[int, set[Var]] = {}
|
||||
self.defs: dict[int, set[Var]] = {}
|
||||
|
||||
self._func_parent, self._func_params = self._collect_function_metadata()
|
||||
self._func_scope: dict[int, str] = self._compute_function_scope()
|
||||
self._extract_uses_defs()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1a — Walk AST to collect lexical nesting + parameter lists
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _collect_function_metadata(
|
||||
self,
|
||||
) -> tuple[dict[str, str | None], dict[str, tuple[str, ...]]]:
|
||||
"""Walk the AST and collect function-parent and parameter information.
|
||||
|
||||
Returns
|
||||
-------
|
||||
func_parent : dict[str, str | None]
|
||||
func_parent[f] is the name of the immediately enclosing function
|
||||
(or None for top-level functions).
|
||||
func_params : dict[str, tuple[str, ...]]
|
||||
func_params[f] is the ordered tuple of formal parameter names of f.
|
||||
"""
|
||||
func_parent: dict[str, str | None] = {}
|
||||
func_params: dict[str, tuple[str, ...]] = {}
|
||||
|
||||
def visit(expr: syntax.EXPRESSION | None, current_func: str | None) -> None:
|
||||
if expr is None:
|
||||
return
|
||||
if isinstance(expr, syntax.LET):
|
||||
decls = expr.decl if isinstance(expr.decl, list) else [expr.decl]
|
||||
# Register metadata for each declared function.
|
||||
for d in decls:
|
||||
if isinstance(d, syntax.DECL):
|
||||
# Use assignment (last-seen wins) to stay consistent
|
||||
# with cfg_build.FUNCTIONS, which also overwrites on
|
||||
# duplicate names. setdefault (first-seen wins) would
|
||||
# disagree when a nested function shadows a top-level
|
||||
# one with the same name, causing wrong scope resolution.
|
||||
func_parent[d.f_name] = current_func
|
||||
func_params[d.f_name] = tuple(d.params)
|
||||
# Recurse into function bodies and the in-expression.
|
||||
for d in decls:
|
||||
if isinstance(d, syntax.DECL):
|
||||
visit(d.body, d.f_name)
|
||||
else:
|
||||
visit(d, current_func)
|
||||
visit(expr.body, current_func)
|
||||
return
|
||||
for _, child in expr.children():
|
||||
visit(child, current_func)
|
||||
|
||||
visit(self.cfg.ast, None)
|
||||
return func_parent, func_params
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1b — Resolve a variable name through the lexical scope chain
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _resolve_var(self, func: str | None, name: str) -> Var:
|
||||
"""Resolve a variable name via lexical scope chain."""
|
||||
if func is None:
|
||||
return (GLOBAL_SCOPE, name)
|
||||
|
||||
cur: str | None = func
|
||||
seen: set[str] = set()
|
||||
while cur is not None and cur not in seen:
|
||||
seen.add(cur)
|
||||
if name in self._func_params.get(cur, ()):
|
||||
return (cur, name)
|
||||
cur = self._func_parent.get(cur)
|
||||
|
||||
# Fallback: local variable in current function scope.
|
||||
return (func, name)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 2 — BFS-assign every CFG node to its owning function
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _compute_function_scope(self) -> dict[int, str]:
|
||||
"""BFS from each function's START node; return node-id → function-name.
|
||||
|
||||
Two stopping conditions keep attribution strictly inside each function:
|
||||
|
||||
1. Do not follow into a *different* function's CFG_START (prevents
|
||||
attributing callee body nodes to the caller, and vice-versa).
|
||||
2. Do not follow *past* the function's own CFG_END (prevents
|
||||
following CFG_END → CFG_RETURN → continuation nodes that belong
|
||||
to the *caller* context, which caused variables used there to be
|
||||
resolved in the wrong scope).
|
||||
|
||||
The first function whose BFS claims a node wins.
|
||||
"""
|
||||
functions = self._functions
|
||||
func_scope: dict[int, str] = {}
|
||||
all_f_start_ids: set[int] = {fs.id for _, (fs, _) in functions.items()}
|
||||
|
||||
for f_name, (f_start, f_end) in functions.items():
|
||||
queue: deque = deque([f_start])
|
||||
while queue:
|
||||
node = queue.popleft()
|
||||
if node.id in func_scope:
|
||||
continue # already claimed by an earlier function
|
||||
func_scope[node.id] = f_name
|
||||
# Stop here — do not follow CFG_END into caller context.
|
||||
if node.id == f_end.id:
|
||||
continue
|
||||
for child in node.children:
|
||||
# Do not follow into a different function's START.
|
||||
if (
|
||||
isinstance(child, CFG_START)
|
||||
and child.id in all_f_start_ids
|
||||
and child.id != f_start.id
|
||||
):
|
||||
continue
|
||||
queue.append(child)
|
||||
|
||||
return func_scope
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 3 — Extract uses / defs for every CFG node
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_uses_defs(self) -> None:
|
||||
"""Populate ``self.uses`` and ``self.defs`` for every node in the CFG.
|
||||
|
||||
Extraction rules:
|
||||
• CFG_START(DECL f(p1,…,pk)) → defs = {(f,p1), …, (f,pk)}
|
||||
• Node wrapping ID(x) → uses = {lexical_resolve(func, x)}
|
||||
• Node wrapping ASSIGN(x = e) → defs = {lexical_resolve(func, x)}
|
||||
• Everything else → uses = {}, defs = {}
|
||||
|
||||
Sub-expressions already have their own CFG nodes and are not
|
||||
re-inspected here; each node is responsible only for its own ast_node.
|
||||
"""
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
func = self._func_scope.get(nid) # None → outer / global scope
|
||||
ast = node.ast_node
|
||||
|
||||
uses: set[Var] = set()
|
||||
defs: set[Var] = set()
|
||||
|
||||
if isinstance(node, CFG_START) and isinstance(ast, syntax.DECL):
|
||||
# Function entry defines each formal parameter.
|
||||
for param in ast.params:
|
||||
defs.add((ast.f_name, param))
|
||||
elif ast is not None:
|
||||
if isinstance(ast, syntax.ID):
|
||||
resolved = self._resolve_var(func, ast.name)
|
||||
uses.add(resolved)
|
||||
elif isinstance(ast, syntax.ASSIGN):
|
||||
resolved = self._resolve_var(func, ast.var.name)
|
||||
defs.add(resolved)
|
||||
|
||||
self.uses[nid] = uses
|
||||
self.defs[nid] = defs
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Live Variables Analysis
|
||||
# ============================================================================
|
||||
|
||||
class LiveVariablesAnalysis(_BackwardAnalysisBase):
|
||||
"""Backward dataflow analysis: Live Variables.
|
||||
|
||||
A variable (f, x) is *live* at the entry of node n if there is a path
|
||||
from n to some use of (f, x) along which (f, x) is not redefined.
|
||||
|
||||
This is the simpler predecessor to ReachedUsesAnalysis (reached_uses.py):
|
||||
it tracks which variables are live, not *where* they are used.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
gen dict[int, set[Var]] GEN(n) = uses(n) — vars used at n
|
||||
kill dict[int, set[Var]] KILL(n) = defs(n) — vars defined at n
|
||||
in_sets dict[int, set[Var]] live variables at n's *entry*
|
||||
out_sets dict[int, set[Var]] live variables at n's *exit*
|
||||
|
||||
(uses and defs are identical to gen / kill and are inherited from the
|
||||
base class.)
|
||||
|
||||
Transfer equations (backward):
|
||||
OUT(n) = ∪ IN(s) for all successors s
|
||||
IN(n) = (OUT(n) − KILL(n)) ∪ GEN(n)
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: "CFG") -> None:
|
||||
# Base populates uses, defs, _func_scope, etc.
|
||||
super().__init__(cfg)
|
||||
|
||||
self.gen: dict[int, set[Var]] = {}
|
||||
self.kill: dict[int, set[Var]] = {}
|
||||
self.in_sets: dict[int, set[Var]] = {}
|
||||
self.out_sets: dict[int, set[Var]] = {}
|
||||
|
||||
self._build_gen_kill()
|
||||
self.solve()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Build gen / kill; initialise in / out to ∅
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_gen_kill(self) -> None:
|
||||
"""GEN(n) = uses(n), KILL(n) = defs(n); initialise in/out sets."""
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
self.gen[nid] = set(self.uses[nid])
|
||||
self.kill[nid] = set(self.defs[nid])
|
||||
self.in_sets[nid] = set()
|
||||
self.out_sets[nid] = set()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Backward worklist fixpoint
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def solve(self) -> None:
|
||||
"""Backward worklist until fixpoint.
|
||||
|
||||
Transfer:
|
||||
OUT(n) = ∪ IN(s) for all successors s
|
||||
IN(n) = (OUT(n) − KILL(n)) ∪ GEN(n)
|
||||
|
||||
Only nodes reachable from cfg.START are processed (guard against
|
||||
propagate=False parent references from CFG.__remove_and_rewire).
|
||||
"""
|
||||
nodes = list(self.cfg.nodes())
|
||||
known: set[int] = set(self.gen.keys())
|
||||
id_to_node = {n.id: n for n in nodes}
|
||||
worklist: deque = deque(nodes)
|
||||
|
||||
# Build predecessor relation from children edges. This is more reliable
|
||||
# than node.parents because CFG rewiring may add edges with
|
||||
# propagate=False, leaving parent links stale.
|
||||
preds: dict[int, set[int]] = {nid: set() for nid in known}
|
||||
for node in nodes:
|
||||
for child in node.children:
|
||||
if child.id in known:
|
||||
preds[child.id].add(node.id)
|
||||
|
||||
while worklist:
|
||||
node = worklist.popleft()
|
||||
nid = node.id
|
||||
|
||||
new_out: set[Var] = set()
|
||||
for child in node.children:
|
||||
if child.id in known:
|
||||
new_out |= self.in_sets[child.id]
|
||||
|
||||
new_in: set[Var] = (new_out - self.kill[nid]) | self.gen[nid]
|
||||
|
||||
if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]:
|
||||
self.out_sets[nid] = new_out
|
||||
self.in_sets[nid] = new_in
|
||||
for pred_id in preds[nid]:
|
||||
worklist.append(id_to_node[pred_id])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Result
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def live_vars_by_node(self) -> dict[int, set[Var]]:
|
||||
"""Return the live-variable set at the *entry* of each node.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[int, set[Var]]
|
||||
Keys: CFG node ids whose in_set is non-empty.
|
||||
Values: copy of the live-variable set at that node's entry.
|
||||
"""
|
||||
return {nid: set(vs) for nid, vs in self.in_sets.items() if vs}
|
||||
@@ -1,203 +0,0 @@
|
||||
"""
|
||||
reached_uses.py — Reached-Uses backward dataflow analysis for TRIPLA CFGs.
|
||||
|
||||
Extends ``_BackwardAnalysisBase`` from live_variables.py, which provides the
|
||||
shared function-scope resolution and uses/defs extraction machinery. The Live
|
||||
Variables analysis (LiveVariablesAnalysis) in that module is the simpler
|
||||
predecessor of this analysis (tip from the course notes: implement LV first,
|
||||
then extend to RU).
|
||||
|
||||
How ReachedUsesAnalysis extends LiveVariablesAnalysis
|
||||
------------------------------------------------------
|
||||
Live Variables tracks *which* variables are live at each node (set[Var]).
|
||||
Reached Uses additionally tracks *where* each variable is used by attaching
|
||||
the use-node id to every fact, giving set[UseFact] = set[tuple[int, Var]].
|
||||
|
||||
The transfer function changes accordingly:
|
||||
LV: IN(n) = (OUT(n) − KILL_LV(n)) ∪ GEN_LV(n) [sets of Var]
|
||||
RU: IN(n) = (OUT(n) − KILL_RU(n)) ∪ GEN_RU(n) [sets of UseFact]
|
||||
|
||||
GEN_LV(n) = uses(n) — set[Var]
|
||||
GEN_RU(n) = { (n.id, var) | var ∈ uses(n) } — set[UseFact]
|
||||
|
||||
KILL_LV(n) = defs(n) — set[Var]
|
||||
KILL_RU(n) = { (uid, var) | var ∈ defs(n), — set[UseFact]
|
||||
(uid, var) ∈ all_uses_by_var[var] }
|
||||
|
||||
The set-difference in both cases removes exactly the facts for variables
|
||||
that are defined at n — equivalent to the ⊖ operator from the lecture
|
||||
slides (M ⊖ K = {(p,id) ∈ M | id ∉ K}).
|
||||
|
||||
Type aliases
|
||||
------------
|
||||
Var = tuple[str, str] # (scope, variable_name)
|
||||
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
|
||||
|
||||
Analysis attributes (all populated after construction)
|
||||
------------------------------------------------------
|
||||
uses dict[int, set[Var]]
|
||||
defs dict[int, set[Var]]
|
||||
gen dict[int, set[UseFact]]
|
||||
kill dict[int, set[UseFact]]
|
||||
in_sets dict[int, set[UseFact]]
|
||||
out_sets dict[int, set[UseFact]]
|
||||
all_uses_by_var dict[Var, set[UseFact]]
|
||||
|
||||
Final result
|
||||
------------
|
||||
reached_uses_by_node() → dict[int, list[int]]
|
||||
Keys: defining-node ids
|
||||
Values: sorted, deduplicated list of use-node ids reached by the def
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import deque
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Import the shared base class (and Var) from the Live Variables module.
|
||||
from cfa.live_variables import _BackwardAnalysisBase, Var
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cfg.CFG import CFG
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public type aliases (re-exported so tests/reached_uses_stub.py can pick up
|
||||
# ReachedUsesAnalysis without needing to know about live_variables.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Reached-Uses Analysis
|
||||
# ============================================================================
|
||||
|
||||
class ReachedUsesAnalysis(_BackwardAnalysisBase):
|
||||
"""Backward dataflow analysis: Reached Uses.
|
||||
|
||||
Inherits uses/defs extraction and function-scope resolution from
|
||||
_BackwardAnalysisBase (live_variables.py). Extends it with use-fact
|
||||
tracking: each fact carries the id of the node where the variable is used,
|
||||
enabling def-use pairs to be recovered from the fixpoint solution.
|
||||
|
||||
Transfer equations (backward):
|
||||
OUT(n) = ∪ IN(s) for all successors s
|
||||
IN(n) = GEN(n) ∪ (OUT(n) − KILL(n))
|
||||
|
||||
GEN(n) = { (n.id, var) | var ∈ uses(n) }
|
||||
KILL(n) = { (uid, var) | var ∈ defs(n),
|
||||
(uid, var) ∈ all_uses_by_var[var] }
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: "CFG") -> None:
|
||||
# Base populates: uses, defs, _func_scope, _func_parent, _func_params.
|
||||
super().__init__(cfg)
|
||||
|
||||
self.gen: dict[int, set[UseFact]] = {}
|
||||
self.kill: dict[int, set[UseFact]] = {}
|
||||
self.in_sets: dict[int, set[UseFact]] = {}
|
||||
self.out_sets: dict[int, set[UseFact]] = {}
|
||||
self.all_uses_by_var: dict[Var, set[UseFact]] = {}
|
||||
|
||||
self._build_gen_kill()
|
||||
self.solve()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1 — Build gen, kill, all_uses_by_var; initialise in/out
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_gen_kill(self) -> None:
|
||||
"""Compute gen and kill sets; populate all_uses_by_var."""
|
||||
# GEN[n] = { (n.id, var) | var ∈ uses[n] }
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
self.gen[nid] = {(nid, var) for var in self.uses[nid]}
|
||||
self.in_sets[nid] = set()
|
||||
self.out_sets[nid] = set()
|
||||
|
||||
# all_uses_by_var: index all use-facts by their variable.
|
||||
for nid, facts in self.gen.items():
|
||||
for (uid, var) in facts:
|
||||
self.all_uses_by_var.setdefault(var, set()).add((uid, var))
|
||||
|
||||
# KILL[n] = all use-facts for variables defined at n.
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
kill_n: set[UseFact] = set()
|
||||
for var in self.defs[nid]:
|
||||
if var in self.all_uses_by_var:
|
||||
kill_n |= self.all_uses_by_var[var]
|
||||
self.kill[nid] = kill_n
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 2 — Backward worklist fixpoint
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def solve(self) -> None:
|
||||
"""Backward worklist until fixpoint.
|
||||
|
||||
Transfer:
|
||||
OUT(n) = ∪ IN(s) for all successors s
|
||||
IN(n) = GEN(n) ∪ (OUT(n) − KILL(n))
|
||||
|
||||
Only nodes reachable from cfg.START are processed (guard against
|
||||
propagate=False parent references from CFG.__remove_and_rewire).
|
||||
"""
|
||||
nodes = list(self.cfg.nodes())
|
||||
known: set[int] = set(self.gen.keys()) # ids of cfg.nodes()
|
||||
id_to_node = {n.id: n for n in nodes}
|
||||
worklist: deque = deque(nodes)
|
||||
|
||||
# Build predecessor relation from children edges. CFG rewiring may
|
||||
# create edges with propagate=False, so node.parents can be stale.
|
||||
preds: dict[int, set[int]] = {nid: set() for nid in known}
|
||||
for node in nodes:
|
||||
for child in node.children:
|
||||
if child.id in known:
|
||||
preds[child.id].add(node.id)
|
||||
|
||||
while worklist:
|
||||
node = worklist.popleft()
|
||||
nid = node.id
|
||||
|
||||
new_out: set[UseFact] = set()
|
||||
for child in node.children:
|
||||
if child.id in known:
|
||||
new_out |= self.in_sets[child.id]
|
||||
|
||||
new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid])
|
||||
|
||||
if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]:
|
||||
self.out_sets[nid] = new_out
|
||||
self.in_sets[nid] = new_in
|
||||
for pred_id in preds[nid]:
|
||||
worklist.append(id_to_node[pred_id])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public result
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def reached_uses_by_node(self) -> dict[int, list[int]]:
|
||||
"""Return the final reached-uses result.
|
||||
|
||||
For each defining node d:
|
||||
result[d.id] = sorted list of use-node ids u such that
|
||||
(u, var) ∈ OUT[d] for some var ∈ defs[d].
|
||||
|
||||
Semantics: the definition at d of variable var reaches the use at u
|
||||
if there is a CFG path d → … → u along which var is not redefined.
|
||||
|
||||
Only nodes with at least one definition appear as keys.
|
||||
"""
|
||||
result: dict[int, list[int]] = {}
|
||||
for node in self.cfg.nodes():
|
||||
nid = node.id
|
||||
defs_n = self.defs[nid]
|
||||
if not defs_n:
|
||||
continue
|
||||
reached: set[int] = set()
|
||||
for (uid, var) in self.out_sets[nid]:
|
||||
if var in defs_n:
|
||||
reached.add(uid)
|
||||
result[nid] = sorted(reached)
|
||||
return result
|
||||
@@ -2,8 +2,7 @@ import syntax
|
||||
import colorsys
|
||||
from cfg.CFG_Node import CFG_DIAMOND
|
||||
|
||||
|
||||
def _expr_used_names(expr) -> set[str]:
|
||||
def __expr_used_names(expr) -> set[str]:
|
||||
"""Collect variable names (syntax.ID) used inside an expression subtree."""
|
||||
used: set[str] = set()
|
||||
|
||||
@@ -20,9 +19,8 @@ def _expr_used_names(expr) -> set[str]:
|
||||
visit(expr)
|
||||
return used
|
||||
|
||||
|
||||
def _show_analysis_on_node(node) -> bool:
|
||||
"""Return True if analysis annotations should be displayed for this node."""
|
||||
# Weather a node should display analysis annotations
|
||||
def __should_display_analysis(node) -> bool:
|
||||
ast = node.ast_node
|
||||
if isinstance(node, CFG_DIAMOND):
|
||||
return False
|
||||
@@ -47,13 +45,13 @@ def _show_analysis_on_node(node) -> bool:
|
||||
|
||||
def _lv_in_for_display(node, analysis):
|
||||
"""Display-level IN set for LV."""
|
||||
in_set = set(analysis.in_sets.get(node.id, set()))
|
||||
in_set = set(analysis.incoming.get(node.id, set()))
|
||||
ast_node = node.ast_node
|
||||
if isinstance(ast_node, syntax.ASSIGN):
|
||||
func = analysis._func_scope.get(node.id)
|
||||
func = analysis.__func_scope.get(node.id)
|
||||
rhs_vars = {
|
||||
analysis._resolve_var(func, name)
|
||||
for name in _expr_used_names(ast_node.expr)
|
||||
analysis.__resolve_var(func, name)
|
||||
for name in __expr_used_names(ast_node.expr)
|
||||
}
|
||||
in_set |= rhs_vars
|
||||
return in_set
|
||||
@@ -83,22 +81,22 @@ def run_all_analyses(cfg):
|
||||
"""
|
||||
node_by_id = {n.id: n for n in cfg.nodes()}
|
||||
|
||||
from cfa.live_variables import LiveVariablesAnalysis
|
||||
from cfa.reached_uses import ReachedUsesAnalysis
|
||||
from cfa.LiveVariables import LiveVariablesAnalysis
|
||||
from cfa.ReachedUses import ReachedUsesAnalysis
|
||||
|
||||
lv = LiveVariablesAnalysis(cfg)
|
||||
ru = ReachedUsesAnalysis(cfg)
|
||||
|
||||
all_ids = set(lv.in_sets.keys()) | set(lv.out_sets.keys())
|
||||
all_ids = set(lv.incoming.keys()) | set(lv.outgoing.keys())
|
||||
annotations = {
|
||||
nid: (
|
||||
"LivingVariables\\n"
|
||||
f"In := {sorted(_lv_in_for_display(node_by_id[nid], lv))}\\n"
|
||||
f"Out := {sorted(lv.out_sets.get(nid, set()))}"
|
||||
f"Out := {sorted(lv.outgoing.get(nid, set()))}"
|
||||
)
|
||||
for nid in all_ids
|
||||
if lv.in_sets.get(nid, set()) or lv.out_sets.get(nid, set())
|
||||
if nid in node_by_id and _show_analysis_on_node(node_by_id[nid])
|
||||
if lv.incoming.get(nid, set()) or lv.outgoing.get(nid, set())
|
||||
if nid in node_by_id and __should_display_analysis(node_by_id[nid])
|
||||
}
|
||||
|
||||
return {"lv": lv, "ru": ru}, annotations, ru.reached_uses_by_node()
|
||||
@@ -11,7 +11,7 @@ import cfg_build
|
||||
import lib.console as cnsl
|
||||
import syntax
|
||||
import triplayacc as yacc
|
||||
from cfa.analysis_dot import analysis_to_dot, run_all_analyses
|
||||
from cfa.to_dot import analysis_to_dot, run_all_analyses
|
||||
from cfg.CFG import CFG
|
||||
from vistram.tram import *
|
||||
from vistram.vistram import MachineUI
|
||||
@@ -81,10 +81,10 @@ def print_analysis_reports(cfg, analyses: dict, ru_edges: dict[int, list[int]]):
|
||||
|
||||
print("\nLive Variables Report")
|
||||
print("---------------------")
|
||||
node_ids = sorted(set(lv.in_sets.keys()) | set(lv.out_sets.keys()))
|
||||
node_ids = sorted(set(lv.incoming.keys()) | set(lv.outgoing.keys()))
|
||||
for nid in node_ids:
|
||||
in_set = sorted(lv.in_sets.get(nid, set()))
|
||||
out_set = sorted(lv.out_sets.get(nid, set()))
|
||||
in_set = sorted(lv.incoming.get(nid, set()))
|
||||
out_set = sorted(lv.outgoing.get(nid, set()))
|
||||
if not in_set and not out_set:
|
||||
continue
|
||||
print(f"n{nid} [{node_text(nid)}]: In={in_set} Out={out_set}")
|
||||
|
||||
Reference in New Issue
Block a user