""" reached_uses.py — Reached-Uses backward dataflow analysis for TRIPLA CFGs. Extends ``_BackwardAnalysisBase`` from live_variables.py, which provides the shared function-scope resolution and uses/defs extraction machinery. The Live Variables analysis (LiveVariablesAnalysis) in that module is the simpler predecessor of this analysis (tip from the course notes: implement LV first, then extend to RU). How ReachedUsesAnalysis extends LiveVariablesAnalysis ------------------------------------------------------ Live Variables tracks *which* variables are live at each node (set[Var]). Reached Uses additionally tracks *where* each variable is used by attaching the use-node id to every fact, giving set[UseFact] = set[tuple[int, Var]]. The transfer function changes accordingly: LV: IN(n) = (OUT(n) − KILL_LV(n)) ∪ GEN_LV(n) [sets of Var] RU: IN(n) = (OUT(n) − KILL_RU(n)) ∪ GEN_RU(n) [sets of UseFact] GEN_LV(n) = uses(n) — set[Var] GEN_RU(n) = { (n.id, var) | var ∈ uses(n) } — set[UseFact] KILL_LV(n) = defs(n) — set[Var] KILL_RU(n) = { (uid, var) | var ∈ defs(n), — set[UseFact] (uid, var) ∈ all_uses_by_var[var] } The set-difference in both cases removes exactly the facts for variables that are defined at n — equivalent to the ⊖ operator from the lecture slides (M ⊖ K = {(p,id) ∈ M | id ∉ K}). Type aliases ------------ Var = tuple[str, str] # (scope, variable_name) UseFact = tuple[int, Var] # (use_node_id, scoped_var) Analysis attributes (all populated after construction) ------------------------------------------------------ uses dict[int, set[Var]] defs dict[int, set[Var]] gen dict[int, set[UseFact]] kill dict[int, set[UseFact]] in_sets dict[int, set[UseFact]] out_sets dict[int, set[UseFact]] all_uses_by_var dict[Var, set[UseFact]] Final result ------------ reached_uses_by_node() → dict[int, list[int]] Keys: defining-node ids Values: sorted, deduplicated list of use-node ids reached by the def """ from __future__ import annotations from collections import deque from typing import TYPE_CHECKING # Import the shared base class (and Var) from the Live Variables module. from cfa.live_variables import _BackwardAnalysisBase, Var if TYPE_CHECKING: from cfg.CFG import CFG # --------------------------------------------------------------------------- # Public type aliases (re-exported so tests/reached_uses_stub.py can pick up # ReachedUsesAnalysis without needing to know about live_variables.py) # --------------------------------------------------------------------------- UseFact = tuple[int, Var] # (use_node_id, scoped_var) # ============================================================================ # Reached-Uses Analysis # ============================================================================ class ReachedUsesAnalysis(_BackwardAnalysisBase): """Backward dataflow analysis: Reached Uses. Inherits uses/defs extraction and function-scope resolution from _BackwardAnalysisBase (live_variables.py). Extends it with use-fact tracking: each fact carries the id of the node where the variable is used, enabling def-use pairs to be recovered from the fixpoint solution. Transfer equations (backward): OUT(n) = ∪ IN(s) for all successors s IN(n) = GEN(n) ∪ (OUT(n) − KILL(n)) GEN(n) = { (n.id, var) | var ∈ uses(n) } KILL(n) = { (uid, var) | var ∈ defs(n), (uid, var) ∈ all_uses_by_var[var] } """ def __init__(self, cfg: "CFG") -> None: # Base populates: uses, defs, _func_scope, _func_parent, _func_params. super().__init__(cfg) self.gen: dict[int, set[UseFact]] = {} self.kill: dict[int, set[UseFact]] = {} self.in_sets: dict[int, set[UseFact]] = {} self.out_sets: dict[int, set[UseFact]] = {} self.all_uses_by_var: dict[Var, set[UseFact]] = {} self._build_gen_kill() self.solve() # ------------------------------------------------------------------ # Step 1 — Build gen, kill, all_uses_by_var; initialise in/out # ------------------------------------------------------------------ def _build_gen_kill(self) -> None: """Compute gen and kill sets; populate all_uses_by_var.""" # GEN[n] = { (n.id, var) | var ∈ uses[n] } for node in self.cfg.nodes(): nid = node.id self.gen[nid] = {(nid, var) for var in self.uses[nid]} self.in_sets[nid] = set() self.out_sets[nid] = set() # all_uses_by_var: index all use-facts by their variable. for nid, facts in self.gen.items(): for (uid, var) in facts: self.all_uses_by_var.setdefault(var, set()).add((uid, var)) # KILL[n] = all use-facts for variables defined at n. for node in self.cfg.nodes(): nid = node.id kill_n: set[UseFact] = set() for var in self.defs[nid]: if var in self.all_uses_by_var: kill_n |= self.all_uses_by_var[var] self.kill[nid] = kill_n # ------------------------------------------------------------------ # Step 2 — Backward worklist fixpoint # ------------------------------------------------------------------ def solve(self) -> None: """Backward worklist until fixpoint. Transfer: OUT(n) = ∪ IN(s) for all successors s IN(n) = GEN(n) ∪ (OUT(n) − KILL(n)) Only nodes reachable from cfg.START are processed (guard against propagate=False parent references from CFG.__remove_and_rewire). """ nodes = list(self.cfg.nodes()) known: set[int] = set(self.gen.keys()) # ids of cfg.nodes() id_to_node = {n.id: n for n in nodes} worklist: deque = deque(nodes) # Build predecessor relation from children edges. CFG rewiring may # create edges with propagate=False, so node.parents can be stale. preds: dict[int, set[int]] = {nid: set() for nid in known} for node in nodes: for child in node.children: if child.id in known: preds[child.id].add(node.id) while worklist: node = worklist.popleft() nid = node.id new_out: set[UseFact] = set() for child in node.children: if child.id in known: new_out |= self.in_sets[child.id] new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid]) if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]: self.out_sets[nid] = new_out self.in_sets[nid] = new_in for pred_id in preds[nid]: worklist.append(id_to_node[pred_id]) # ------------------------------------------------------------------ # Public result # ------------------------------------------------------------------ def reached_uses_by_node(self) -> dict[int, list[int]]: """Return the final reached-uses result. For each defining node d: result[d.id] = sorted list of use-node ids u such that (u, var) ∈ OUT[d] for some var ∈ defs[d]. Semantics: the definition at d of variable var reaches the use at u if there is a CFG path d → … → u along which var is not redefined. Only nodes with at least one definition appear as keys. """ result: dict[int, list[int]] = {} for node in self.cfg.nodes(): nid = node.id defs_n = self.defs[nid] if not defs_n: continue reached: set[int] = set() for (uid, var) in self.out_sets[nid]: if var in defs_n: reached.add(uid) result[nid] = sorted(reached) return result