204 lines
8.1 KiB
Python
204 lines
8.1 KiB
Python
"""
|
||
reached_uses.py — Reached-Uses backward dataflow analysis for TRIPLA CFGs.
|
||
|
||
Extends ``_BackwardAnalysisBase`` from live_variables.py, which provides the
|
||
shared function-scope resolution and uses/defs extraction machinery. The Live
|
||
Variables analysis (LiveVariablesAnalysis) in that module is the simpler
|
||
predecessor of this analysis (tip from the course notes: implement LV first,
|
||
then extend to RU).
|
||
|
||
How ReachedUsesAnalysis extends LiveVariablesAnalysis
|
||
------------------------------------------------------
|
||
Live Variables tracks *which* variables are live at each node (set[Var]).
|
||
Reached Uses additionally tracks *where* each variable is used by attaching
|
||
the use-node id to every fact, giving set[UseFact] = set[tuple[int, Var]].
|
||
|
||
The transfer function changes accordingly:
|
||
LV: IN(n) = (OUT(n) − KILL_LV(n)) ∪ GEN_LV(n) [sets of Var]
|
||
RU: IN(n) = (OUT(n) − KILL_RU(n)) ∪ GEN_RU(n) [sets of UseFact]
|
||
|
||
GEN_LV(n) = uses(n) — set[Var]
|
||
GEN_RU(n) = { (n.id, var) | var ∈ uses(n) } — set[UseFact]
|
||
|
||
KILL_LV(n) = defs(n) — set[Var]
|
||
KILL_RU(n) = { (uid, var) | var ∈ defs(n), — set[UseFact]
|
||
(uid, var) ∈ all_uses_by_var[var] }
|
||
|
||
The set-difference in both cases removes exactly the facts for variables
|
||
that are defined at n — equivalent to the ⊖ operator from the lecture
|
||
slides (M ⊖ K = {(p,id) ∈ M | id ∉ K}).
|
||
|
||
Type aliases
|
||
------------
|
||
Var = tuple[str, str] # (scope, variable_name)
|
||
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
|
||
|
||
Analysis attributes (all populated after construction)
|
||
------------------------------------------------------
|
||
uses dict[int, set[Var]]
|
||
defs dict[int, set[Var]]
|
||
gen dict[int, set[UseFact]]
|
||
kill dict[int, set[UseFact]]
|
||
in_sets dict[int, set[UseFact]]
|
||
out_sets dict[int, set[UseFact]]
|
||
all_uses_by_var dict[Var, set[UseFact]]
|
||
|
||
Final result
|
||
------------
|
||
reached_uses_by_node() → dict[int, list[int]]
|
||
Keys: defining-node ids
|
||
Values: sorted, deduplicated list of use-node ids reached by the def
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from collections import deque
|
||
from typing import TYPE_CHECKING
|
||
|
||
# Import the shared base class (and Var) from the Live Variables module.
|
||
from cfa.live_variables import _BackwardAnalysisBase, Var
|
||
|
||
if TYPE_CHECKING:
|
||
from cfg.CFG import CFG
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Public type aliases (re-exported so tests/reached_uses_stub.py can pick up
|
||
# ReachedUsesAnalysis without needing to know about live_variables.py)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
|
||
|
||
|
||
# ============================================================================
|
||
# Reached-Uses Analysis
|
||
# ============================================================================
|
||
|
||
class ReachedUsesAnalysis(_BackwardAnalysisBase):
|
||
"""Backward dataflow analysis: Reached Uses.
|
||
|
||
Inherits uses/defs extraction and function-scope resolution from
|
||
_BackwardAnalysisBase (live_variables.py). Extends it with use-fact
|
||
tracking: each fact carries the id of the node where the variable is used,
|
||
enabling def-use pairs to be recovered from the fixpoint solution.
|
||
|
||
Transfer equations (backward):
|
||
OUT(n) = ∪ IN(s) for all successors s
|
||
IN(n) = GEN(n) ∪ (OUT(n) − KILL(n))
|
||
|
||
GEN(n) = { (n.id, var) | var ∈ uses(n) }
|
||
KILL(n) = { (uid, var) | var ∈ defs(n),
|
||
(uid, var) ∈ all_uses_by_var[var] }
|
||
"""
|
||
|
||
def __init__(self, cfg: "CFG") -> None:
|
||
# Base populates: uses, defs, _func_scope, _func_parent, _func_params.
|
||
super().__init__(cfg)
|
||
|
||
self.gen: dict[int, set[UseFact]] = {}
|
||
self.kill: dict[int, set[UseFact]] = {}
|
||
self.in_sets: dict[int, set[UseFact]] = {}
|
||
self.out_sets: dict[int, set[UseFact]] = {}
|
||
self.all_uses_by_var: dict[Var, set[UseFact]] = {}
|
||
|
||
self._build_gen_kill()
|
||
self.solve()
|
||
|
||
# ------------------------------------------------------------------
|
||
# Step 1 — Build gen, kill, all_uses_by_var; initialise in/out
|
||
# ------------------------------------------------------------------
|
||
|
||
def _build_gen_kill(self) -> None:
|
||
"""Compute gen and kill sets; populate all_uses_by_var."""
|
||
# GEN[n] = { (n.id, var) | var ∈ uses[n] }
|
||
for node in self.cfg.nodes():
|
||
nid = node.id
|
||
self.gen[nid] = {(nid, var) for var in self.uses[nid]}
|
||
self.in_sets[nid] = set()
|
||
self.out_sets[nid] = set()
|
||
|
||
# all_uses_by_var: index all use-facts by their variable.
|
||
for nid, facts in self.gen.items():
|
||
for (uid, var) in facts:
|
||
self.all_uses_by_var.setdefault(var, set()).add((uid, var))
|
||
|
||
# KILL[n] = all use-facts for variables defined at n.
|
||
for node in self.cfg.nodes():
|
||
nid = node.id
|
||
kill_n: set[UseFact] = set()
|
||
for var in self.defs[nid]:
|
||
if var in self.all_uses_by_var:
|
||
kill_n |= self.all_uses_by_var[var]
|
||
self.kill[nid] = kill_n
|
||
|
||
# ------------------------------------------------------------------
|
||
# Step 2 — Backward worklist fixpoint
|
||
# ------------------------------------------------------------------
|
||
|
||
def solve(self) -> None:
|
||
"""Backward worklist until fixpoint.
|
||
|
||
Transfer:
|
||
OUT(n) = ∪ IN(s) for all successors s
|
||
IN(n) = GEN(n) ∪ (OUT(n) − KILL(n))
|
||
|
||
Only nodes reachable from cfg.START are processed (guard against
|
||
propagate=False parent references from CFG.__remove_and_rewire).
|
||
"""
|
||
nodes = list(self.cfg.nodes())
|
||
known: set[int] = set(self.gen.keys()) # ids of cfg.nodes()
|
||
id_to_node = {n.id: n for n in nodes}
|
||
worklist: deque = deque(nodes)
|
||
|
||
# Build predecessor relation from children edges. CFG rewiring may
|
||
# create edges with propagate=False, so node.parents can be stale.
|
||
preds: dict[int, set[int]] = {nid: set() for nid in known}
|
||
for node in nodes:
|
||
for child in node.children:
|
||
if child.id in known:
|
||
preds[child.id].add(node.id)
|
||
|
||
while worklist:
|
||
node = worklist.popleft()
|
||
nid = node.id
|
||
|
||
new_out: set[UseFact] = set()
|
||
for child in node.children:
|
||
if child.id in known:
|
||
new_out |= self.in_sets[child.id]
|
||
|
||
new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid])
|
||
|
||
if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]:
|
||
self.out_sets[nid] = new_out
|
||
self.in_sets[nid] = new_in
|
||
for pred_id in preds[nid]:
|
||
worklist.append(id_to_node[pred_id])
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public result
|
||
# ------------------------------------------------------------------
|
||
|
||
def reached_uses_by_node(self) -> dict[int, list[int]]:
|
||
"""Return the final reached-uses result.
|
||
|
||
For each defining node d:
|
||
result[d.id] = sorted list of use-node ids u such that
|
||
(u, var) ∈ OUT[d] for some var ∈ defs[d].
|
||
|
||
Semantics: the definition at d of variable var reaches the use at u
|
||
if there is a CFG path d → … → u along which var is not redefined.
|
||
|
||
Only nodes with at least one definition appear as keys.
|
||
"""
|
||
result: dict[int, list[int]] = {}
|
||
for node in self.cfg.nodes():
|
||
nid = node.id
|
||
defs_n = self.defs[nid]
|
||
if not defs_n:
|
||
continue
|
||
reached: set[int] = set()
|
||
for (uid, var) in self.out_sets[nid]:
|
||
if var in defs_n:
|
||
reached.add(uid)
|
||
result[nid] = sorted(reached)
|
||
return result
|