First working solution of new task

This commit is contained in:
Jan-Niclas Loosen
2026-03-05 18:03:55 +01:00
parent 691d6eba8c
commit de46c67129
11 changed files with 1077 additions and 9 deletions

View File

@@ -0,0 +1,203 @@
"""
reached_uses.py — Reached-Uses backward dataflow analysis for TRIPLA CFGs.
Extends ``_BackwardAnalysisBase`` from live_variables.py, which provides the
shared function-scope resolution and uses/defs extraction machinery. The Live
Variables analysis (LiveVariablesAnalysis) in that module is the simpler
predecessor of this analysis (tip from the course notes: implement LV first,
then extend to RU).
How ReachedUsesAnalysis extends LiveVariablesAnalysis
------------------------------------------------------
Live Variables tracks *which* variables are live at each node (set[Var]).
Reached Uses additionally tracks *where* each variable is used by attaching
the use-node id to every fact, giving set[UseFact] = set[tuple[int, Var]].
The transfer function changes accordingly:
LV: IN(n) = (OUT(n) KILL_LV(n)) GEN_LV(n) [sets of Var]
RU: IN(n) = (OUT(n) KILL_RU(n)) GEN_RU(n) [sets of UseFact]
GEN_LV(n) = uses(n) — set[Var]
GEN_RU(n) = { (n.id, var) | var ∈ uses(n) } — set[UseFact]
KILL_LV(n) = defs(n) — set[Var]
KILL_RU(n) = { (uid, var) | var ∈ defs(n), — set[UseFact]
(uid, var) ∈ all_uses_by_var[var] }
The set-difference in both cases removes exactly the facts for variables
that are defined at n — equivalent to the ⊖ operator from the lecture
slides (M ⊖ K = {(p,id) ∈ M | id ∉ K}).
Type aliases
------------
Var = tuple[str, str] # (scope, variable_name)
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
Analysis attributes (all populated after construction)
------------------------------------------------------
uses dict[int, set[Var]]
defs dict[int, set[Var]]
gen dict[int, set[UseFact]]
kill dict[int, set[UseFact]]
in_sets dict[int, set[UseFact]]
out_sets dict[int, set[UseFact]]
all_uses_by_var dict[Var, set[UseFact]]
Final result
------------
reached_uses_by_node() → dict[int, list[int]]
Keys: defining-node ids
Values: sorted, deduplicated list of use-node ids reached by the def
"""
from __future__ import annotations
from collections import deque
from typing import TYPE_CHECKING
# Import the shared base class (and Var) from the Live Variables module.
from cfa.live_variables import _BackwardAnalysisBase, Var
if TYPE_CHECKING:
from cfg.CFG import CFG
# ---------------------------------------------------------------------------
# Public type aliases (re-exported so tests/reached_uses_stub.py can pick up
# ReachedUsesAnalysis without needing to know about live_variables.py)
# ---------------------------------------------------------------------------
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
# ============================================================================
# Reached-Uses Analysis
# ============================================================================
class ReachedUsesAnalysis(_BackwardAnalysisBase):
"""Backward dataflow analysis: Reached Uses.
Inherits uses/defs extraction and function-scope resolution from
_BackwardAnalysisBase (live_variables.py). Extends it with use-fact
tracking: each fact carries the id of the node where the variable is used,
enabling def-use pairs to be recovered from the fixpoint solution.
Transfer equations (backward):
OUT(n) = IN(s) for all successors s
IN(n) = GEN(n) (OUT(n) KILL(n))
GEN(n) = { (n.id, var) | var ∈ uses(n) }
KILL(n) = { (uid, var) | var ∈ defs(n),
(uid, var) ∈ all_uses_by_var[var] }
"""
def __init__(self, cfg: "CFG") -> None:
# Base populates: uses, defs, _func_scope, _func_parent, _func_params.
super().__init__(cfg)
self.gen: dict[int, set[UseFact]] = {}
self.kill: dict[int, set[UseFact]] = {}
self.in_sets: dict[int, set[UseFact]] = {}
self.out_sets: dict[int, set[UseFact]] = {}
self.all_uses_by_var: dict[Var, set[UseFact]] = {}
self._build_gen_kill()
self.solve()
# ------------------------------------------------------------------
# Step 1 — Build gen, kill, all_uses_by_var; initialise in/out
# ------------------------------------------------------------------
def _build_gen_kill(self) -> None:
"""Compute gen and kill sets; populate all_uses_by_var."""
# GEN[n] = { (n.id, var) | var ∈ uses[n] }
for node in self.cfg.nodes():
nid = node.id
self.gen[nid] = {(nid, var) for var in self.uses[nid]}
self.in_sets[nid] = set()
self.out_sets[nid] = set()
# all_uses_by_var: index all use-facts by their variable.
for nid, facts in self.gen.items():
for (uid, var) in facts:
self.all_uses_by_var.setdefault(var, set()).add((uid, var))
# KILL[n] = all use-facts for variables defined at n.
for node in self.cfg.nodes():
nid = node.id
kill_n: set[UseFact] = set()
for var in self.defs[nid]:
if var in self.all_uses_by_var:
kill_n |= self.all_uses_by_var[var]
self.kill[nid] = kill_n
# ------------------------------------------------------------------
# Step 2 — Backward worklist fixpoint
# ------------------------------------------------------------------
def solve(self) -> None:
"""Backward worklist until fixpoint.
Transfer:
OUT(n) = IN(s) for all successors s
IN(n) = GEN(n) (OUT(n) KILL(n))
Only nodes reachable from cfg.START are processed (guard against
propagate=False parent references from CFG.__remove_and_rewire).
"""
nodes = list(self.cfg.nodes())
known: set[int] = set(self.gen.keys()) # ids of cfg.nodes()
id_to_node = {n.id: n for n in nodes}
worklist: deque = deque(nodes)
# Build predecessor relation from children edges. CFG rewiring may
# create edges with propagate=False, so node.parents can be stale.
preds: dict[int, set[int]] = {nid: set() for nid in known}
for node in nodes:
for child in node.children:
if child.id in known:
preds[child.id].add(node.id)
while worklist:
node = worklist.popleft()
nid = node.id
new_out: set[UseFact] = set()
for child in node.children:
if child.id in known:
new_out |= self.in_sets[child.id]
new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid])
if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]:
self.out_sets[nid] = new_out
self.in_sets[nid] = new_in
for pred_id in preds[nid]:
worklist.append(id_to_node[pred_id])
# ------------------------------------------------------------------
# Public result
# ------------------------------------------------------------------
def reached_uses_by_node(self) -> dict[int, list[int]]:
"""Return the final reached-uses result.
For each defining node d:
result[d.id] = sorted list of use-node ids u such that
(u, var) ∈ OUT[d] for some var ∈ defs[d].
Semantics: the definition at d of variable var reaches the use at u
if there is a CFG path d → … → u along which var is not redefined.
Only nodes with at least one definition appear as keys.
"""
result: dict[int, list[int]] = {}
for node in self.cfg.nodes():
nid = node.id
defs_n = self.defs[nid]
if not defs_n:
continue
reached: set[int] = set()
for (uid, var) in self.out_sets[nid]:
if var in defs_n:
reached.add(uid)
result[nid] = sorted(reached)
return result