Files
Construction-of-Compilers/Project-02-03-04-05/cfa/reached_uses.py
2026-03-05 18:03:55 +01:00

204 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
reached_uses.py — Reached-Uses backward dataflow analysis for TRIPLA CFGs.
Extends ``_BackwardAnalysisBase`` from live_variables.py, which provides the
shared function-scope resolution and uses/defs extraction machinery. The Live
Variables analysis (LiveVariablesAnalysis) in that module is the simpler
predecessor of this analysis (tip from the course notes: implement LV first,
then extend to RU).
How ReachedUsesAnalysis extends LiveVariablesAnalysis
------------------------------------------------------
Live Variables tracks *which* variables are live at each node (set[Var]).
Reached Uses additionally tracks *where* each variable is used by attaching
the use-node id to every fact, giving set[UseFact] = set[tuple[int, Var]].
The transfer function changes accordingly:
LV: IN(n) = (OUT(n) KILL_LV(n)) GEN_LV(n) [sets of Var]
RU: IN(n) = (OUT(n) KILL_RU(n)) GEN_RU(n) [sets of UseFact]
GEN_LV(n) = uses(n) — set[Var]
GEN_RU(n) = { (n.id, var) | var ∈ uses(n) } — set[UseFact]
KILL_LV(n) = defs(n) — set[Var]
KILL_RU(n) = { (uid, var) | var ∈ defs(n), — set[UseFact]
(uid, var) ∈ all_uses_by_var[var] }
The set-difference in both cases removes exactly the facts for variables
that are defined at n — equivalent to the ⊖ operator from the lecture
slides (M ⊖ K = {(p,id) ∈ M | id ∉ K}).
Type aliases
------------
Var = tuple[str, str] # (scope, variable_name)
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
Analysis attributes (all populated after construction)
------------------------------------------------------
uses dict[int, set[Var]]
defs dict[int, set[Var]]
gen dict[int, set[UseFact]]
kill dict[int, set[UseFact]]
in_sets dict[int, set[UseFact]]
out_sets dict[int, set[UseFact]]
all_uses_by_var dict[Var, set[UseFact]]
Final result
------------
reached_uses_by_node() → dict[int, list[int]]
Keys: defining-node ids
Values: sorted, deduplicated list of use-node ids reached by the def
"""
from __future__ import annotations
from collections import deque
from typing import TYPE_CHECKING
# Import the shared base class (and Var) from the Live Variables module.
from cfa.live_variables import _BackwardAnalysisBase, Var
if TYPE_CHECKING:
from cfg.CFG import CFG
# ---------------------------------------------------------------------------
# Public type aliases (re-exported so tests/reached_uses_stub.py can pick up
# ReachedUsesAnalysis without needing to know about live_variables.py)
# ---------------------------------------------------------------------------
UseFact = tuple[int, Var] # (use_node_id, scoped_var)
# ============================================================================
# Reached-Uses Analysis
# ============================================================================
class ReachedUsesAnalysis(_BackwardAnalysisBase):
"""Backward dataflow analysis: Reached Uses.
Inherits uses/defs extraction and function-scope resolution from
_BackwardAnalysisBase (live_variables.py). Extends it with use-fact
tracking: each fact carries the id of the node where the variable is used,
enabling def-use pairs to be recovered from the fixpoint solution.
Transfer equations (backward):
OUT(n) = IN(s) for all successors s
IN(n) = GEN(n) (OUT(n) KILL(n))
GEN(n) = { (n.id, var) | var ∈ uses(n) }
KILL(n) = { (uid, var) | var ∈ defs(n),
(uid, var) ∈ all_uses_by_var[var] }
"""
def __init__(self, cfg: "CFG") -> None:
# Base populates: uses, defs, _func_scope, _func_parent, _func_params.
super().__init__(cfg)
self.gen: dict[int, set[UseFact]] = {}
self.kill: dict[int, set[UseFact]] = {}
self.in_sets: dict[int, set[UseFact]] = {}
self.out_sets: dict[int, set[UseFact]] = {}
self.all_uses_by_var: dict[Var, set[UseFact]] = {}
self._build_gen_kill()
self.solve()
# ------------------------------------------------------------------
# Step 1 — Build gen, kill, all_uses_by_var; initialise in/out
# ------------------------------------------------------------------
def _build_gen_kill(self) -> None:
"""Compute gen and kill sets; populate all_uses_by_var."""
# GEN[n] = { (n.id, var) | var ∈ uses[n] }
for node in self.cfg.nodes():
nid = node.id
self.gen[nid] = {(nid, var) for var in self.uses[nid]}
self.in_sets[nid] = set()
self.out_sets[nid] = set()
# all_uses_by_var: index all use-facts by their variable.
for nid, facts in self.gen.items():
for (uid, var) in facts:
self.all_uses_by_var.setdefault(var, set()).add((uid, var))
# KILL[n] = all use-facts for variables defined at n.
for node in self.cfg.nodes():
nid = node.id
kill_n: set[UseFact] = set()
for var in self.defs[nid]:
if var in self.all_uses_by_var:
kill_n |= self.all_uses_by_var[var]
self.kill[nid] = kill_n
# ------------------------------------------------------------------
# Step 2 — Backward worklist fixpoint
# ------------------------------------------------------------------
def solve(self) -> None:
"""Backward worklist until fixpoint.
Transfer:
OUT(n) = IN(s) for all successors s
IN(n) = GEN(n) (OUT(n) KILL(n))
Only nodes reachable from cfg.START are processed (guard against
propagate=False parent references from CFG.__remove_and_rewire).
"""
nodes = list(self.cfg.nodes())
known: set[int] = set(self.gen.keys()) # ids of cfg.nodes()
id_to_node = {n.id: n for n in nodes}
worklist: deque = deque(nodes)
# Build predecessor relation from children edges. CFG rewiring may
# create edges with propagate=False, so node.parents can be stale.
preds: dict[int, set[int]] = {nid: set() for nid in known}
for node in nodes:
for child in node.children:
if child.id in known:
preds[child.id].add(node.id)
while worklist:
node = worklist.popleft()
nid = node.id
new_out: set[UseFact] = set()
for child in node.children:
if child.id in known:
new_out |= self.in_sets[child.id]
new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid])
if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]:
self.out_sets[nid] = new_out
self.in_sets[nid] = new_in
for pred_id in preds[nid]:
worklist.append(id_to_node[pred_id])
# ------------------------------------------------------------------
# Public result
# ------------------------------------------------------------------
def reached_uses_by_node(self) -> dict[int, list[int]]:
"""Return the final reached-uses result.
For each defining node d:
result[d.id] = sorted list of use-node ids u such that
(u, var) ∈ OUT[d] for some var ∈ defs[d].
Semantics: the definition at d of variable var reaches the use at u
if there is a CFG path d → … → u along which var is not redefined.
Only nodes with at least one definition appear as keys.
"""
result: dict[int, list[int]] = {}
for node in self.cfg.nodes():
nid = node.id
defs_n = self.defs[nid]
if not defs_n:
continue
reached: set[int] = set()
for (uid, var) in self.out_sets[nid]:
if var in defs_n:
reached.add(uid)
result[nid] = sorted(reached)
return result