diff --git a/Project-02-03-04-05/cfa/BackwardAnalysis.py b/Project-02-03-04-05/cfa/BackwardAnalysis.py new file mode 100644 index 0000000..04918dd --- /dev/null +++ b/Project-02-03-04-05/cfa/BackwardAnalysis.py @@ -0,0 +1,128 @@ +from __future__ import annotations +from collections import deque +from typing import TYPE_CHECKING + +import cfg_build +import syntax +from cfg.CFG_Node import CFG_START + +if TYPE_CHECKING: + from cfg.CFG import CFG + +GLOBAL_SCOPE = "" + +# A scoped variable: the function it belongs to, and its name. +# The scope is GLOBAL_SCOPE ("") for variables outside any function. +# e.g. ("f", "x") → variable "x" defined in function "f" +# ("", "x") → variable "x" at global scope +Var = tuple[str, str] + +class BackwardAnalysis: + def __init__(self, cfg: "CFG") -> None: + self.cfg = cfg + self.uses: dict[int, set[Var]] = {} + self.defs: dict[int, set[Var]] = {} + + self.__funcs: dict[str, tuple] = dict(cfg_build.FUNCTIONS) + self.__func_parent, self._func_params = self.__collect_function_metadata() + self.__func_scope: dict[int, str] = self.__compute_function_scope() + self.__extract_uses_and_defs() + + # Walk the AST and collect function-parent and parameter information. + def __collect_function_metadata(self) -> tuple[dict[str, str | None], dict[str, tuple[str, ...]]]: + func_parent: dict[str, str | None] = {} + func_params: dict[str, tuple[str, ...]] = {} + + def visit(expr: syntax.EXPRESSION | None, current_func: str | None) -> None: + if expr is None: + return + if isinstance(expr, syntax.LET): + decls = expr.decl if isinstance(expr.decl, list) else [expr.decl] + # Register metadata for each declared function. + for d in decls: + if isinstance(d, syntax.DECL): + func_parent[d.f_name] = current_func + func_params[d.f_name] = tuple(d.params) + # Recurse into function bodies and the in-expression. + for d in decls: + if isinstance(d, syntax.DECL): + visit(d.body, d.f_name) + else: + visit(d, current_func) + visit(expr.body, current_func) + return + for _, child in expr.children(): + visit(child, current_func) + + visit(self.cfg.ast, None) + return func_parent, func_params + + # Calculates the scope (in which function is it?) of each node in the CFG. + def __compute_function_scope(self) -> dict[int, str]: + # The first function whose BFS claims a node wins. + functions = self.__funcs + func_scope: dict[int, str] = {} + all_f_start_ids: set[int] = {fs.id for _, (fs, _) in functions.items()} + + for f_name, (f_start, f_end) in functions.items(): + queue: deque = deque([f_start]) + while queue: + node = queue.popleft() + if node.id in func_scope: + continue # already claimed by an earlier function + func_scope[node.id] = f_name + # Stop here — do not follow into the caller context. + if node.id == f_end.id: + continue + for child in node.children: + # Do not follow into a different function's START. + if ( + isinstance(child, CFG_START) + and child.id in all_f_start_ids + and child.id != f_start.id + ): + continue + queue.append(child) + + return func_scope + + # Populate uses and defs for every node in the CFG. + def __extract_uses_and_defs(self) -> None: + for node in self.cfg.nodes(): + nid = node.id + func = self.__func_scope.get(nid) + ast = node.ast_node + + uses: set[Var] = set() + defs: set[Var] = set() + + if isinstance(node, CFG_START) and isinstance(ast, syntax.DECL): + # Function entry defines each formal parameter. + for param in ast.params: + defs.add((ast.f_name, param)) + elif ast is not None: + if isinstance(ast, syntax.ID): + resolved = self.__resolve_var(func, ast.name) + uses.add(resolved) + elif isinstance(ast, syntax.ASSIGN): + resolved = self.__resolve_var(func, ast.var.name) + defs.add(resolved) + + self.uses[nid] = uses + self.defs[nid] = defs + + # Resolve a variables name and scope by walking up the hierarchy + def __resolve_var(self, func: str | None, name: str) -> Var: + if func is None: + return GLOBAL_SCOPE, name + + cur: str | None = func + seen: set[str] = set() + while cur is not None and cur not in seen: + seen.add(cur) + if name in self._func_params.get(cur, ()): + return cur, name + cur = self.__func_parent.get(cur) + + # Fallback: local variable in the current function scope + return func, name \ No newline at end of file diff --git a/Project-02-03-04-05/cfa/LiveVariables.py b/Project-02-03-04-05/cfa/LiveVariables.py new file mode 100644 index 0000000..07846f8 --- /dev/null +++ b/Project-02-03-04-05/cfa/LiveVariables.py @@ -0,0 +1,64 @@ +from __future__ import annotations +from typing import TYPE_CHECKING +from cfa.BackwardAnalysis import BackwardAnalysis, Var + +if TYPE_CHECKING: + from cfg.CFG import CFG + +class LiveVariables(BackwardAnalysis): + def __init__(self, cfg: "CFG") -> None: + # Base populates uses, defs, _func_scope, etc. + super().__init__(cfg) + + self.gen: dict[int, set[Var]] = {} + self.kill: dict[int, set[Var]] = {} + self.incoming: dict[int, set[Var]] = {} + self.outgoing: dict[int, set[Var]] = {} + + self.__init_sets() + self.solve() + + # Initialize gen, kill, in, and out sets for all CFG nodes. + def __init_sets(self) -> None: + for node in self.cfg.nodes(): + nid = node.id + + # GEN(n) = USE(n); KILL(n) = DEF(n) + self.gen[nid] = set(self.uses[nid]) + self.kill[nid] = set(self.defs[nid]) + + # IN(n) = GEN(n) = USE(n); OUT(n) = empty + self.incoming[nid] = set(self.gen[nid]) + self.outgoing[nid] = set() + + # Update the lists until the fixpoint. + def solve(self) -> None: + nodes = list(self.cfg.nodes()) + known: set[int] = set(n.id for n in nodes) + + # while there are changes do + changes = True + while changes: + changes = False + + # for all v IN V do + for node in nodes: + nid = node.id + + # OUT(n) = UNION IN(s) for all successors s + new_out: set[Var] = set() + for child in node.children: + if child.id in known: + new_out |= self.incoming[child.id] + + # IN(n) = (OUT(n) MINUS KILL(n)) UNION GEN(n) + new_in: set[Var] = (new_out - self.kill[nid]) | self.gen[nid] + + if new_out != self.outgoing[nid] or new_in != self.incoming[nid]: + self.outgoing[nid] = new_out + self.incoming[nid] = new_in + changes = True # there are changes -> loop again + + # Return the living variables within each node + def live_vars_by_node(self) -> dict[int, set[Var]]: + return {nid: set(vs) for nid, vs in self.incoming.items() if vs} \ No newline at end of file diff --git a/Project-02-03-04-05/cfa/ReachedUses.py b/Project-02-03-04-05/cfa/ReachedUses.py new file mode 100644 index 0000000..fc11334 --- /dev/null +++ b/Project-02-03-04-05/cfa/ReachedUses.py @@ -0,0 +1,97 @@ +from __future__ import annotations +from typing import TYPE_CHECKING +from cfa.BackwardAnalysis import BackwardAnalysis, Var + +if TYPE_CHECKING: + from cfg.CFG import CFG + +# A single use-fact: the CFG node at which a variable is used. +# e.g. (42, ("f", "x")) -> variable "x" in function "f" is used at node 42 +UseFact = tuple[int, Var] + +class ReachedUses(BackwardAnalysis): + def __init__(self, cfg: "CFG") -> None: + # Base populates: uses, defs, _func_scope, _func_parent, _func_params. + super().__init__(cfg) + + self.gen: dict[int, set[UseFact]] = {} + self.kill: dict[int, set[UseFact]] = {} + self.in_sets: dict[int, set[UseFact]] = {} + self.out_sets: dict[int, set[UseFact]] = {} + self.all_uses_by_var: dict[Var, set[UseFact]] = {} + + self.__init_sets() + self.solve() + + # Initialize gen, kill, in, and out sets for all CFG nodes. + def __init_sets(self) -> None: + for node in self.cfg.nodes(): + nid = node.id + + # GEN(n) = { (n.id, var) | var IN USE(n) } + self.gen[nid] = {(nid, var) for var in self.uses[nid]} + + # IN(n) = GEN(n); OUT(n) = empty + self.in_sets[nid] = set(self.gen[nid]) + self.out_sets[nid] = set() + + # KILL(n) requires knowing all use-facts for a given variable — "at which nodes is variable x used anywhere?" + # all_uses_by_var builds this lookup once upfront: ("f", "x") -> { (42, ("f","x")), (17, ("f","x")) } + for nid, facts in self.gen.items(): + for (uid, var) in facts: + self.all_uses_by_var.setdefault(var, set()).add((uid, var)) + + for node in self.cfg.nodes(): + nid = node.id + + # KILL(n) = { (uid, var) | var IN DEF(n), (uid, var) IN use_facts_by_var[var] } + # When n defines a variable, it kills all use-facts for that variable, because no use reachable from n + # can have been reached by an earlier definition of the same variable. + kill_n: set[UseFact] = set() + for var in self.defs[nid]: + if var in self.all_uses_by_var: + kill_n |= self.all_uses_by_var[var] + self.kill[nid] = kill_n + + # Update the lists until the fixpoint. + def solve(self) -> None: + nodes = list(self.cfg.nodes()) + known: set[int] = set(n.id for n in nodes) + + # while there are changes do + changes = True + while changes: + changes = False + + # for all v in V do + for node in nodes: + nid = node.id + + # OUT(n) = UNION IN(s) for all successors s + new_out: set[UseFact] = set() + for child in node.children: + if child.id in known: + new_out |= self.in_sets[child.id] + + # IN(n) = GEN(n) UNION (OUT(n) MINUS KILL(n)) + new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid]) + + if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]: + self.out_sets[nid] = new_out + self.in_sets[nid] = new_in + changes = True # there are changes -> loop again + + # Return the final reached-uses result + def reached_uses_by_node(self) -> dict[int, list[int]]: + result: dict[int, list[int]] = {} + for node in self.cfg.nodes(): + nid = node.id + defs_n = self.defs[nid] + if not defs_n: + continue + reached: set[int] = set() + for (uid, var) in self.out_sets[nid]: + if var in defs_n: + reached.add(uid) + result[nid] = sorted(reached) + return result \ No newline at end of file diff --git a/Project-02-03-04-05/cfa/__init__.py b/Project-02-03-04-05/cfa/__init__.py index 978054b..44580cb 100644 --- a/Project-02-03-04-05/cfa/__init__.py +++ b/Project-02-03-04-05/cfa/__init__.py @@ -1,9 +1,11 @@ -from .live_variables import LiveVariablesAnalysis, Var -from .reached_uses import ReachedUsesAnalysis, UseFact +from .BackwardAnalysis import BackwardAnalysis, Var +from .LiveVariables import LiveVariables +from .ReachedUses import ReachedUses, UseFact __all__ = [ "Var", "UseFact", - "LiveVariablesAnalysis", - "ReachedUsesAnalysis", + "BackwardAnalysis", + "LiveVariables", + "ReachedUses", ] diff --git a/Project-02-03-04-05/cfa/live_variables.py b/Project-02-03-04-05/cfa/live_variables.py deleted file mode 100644 index db2f75d..0000000 --- a/Project-02-03-04-05/cfa/live_variables.py +++ /dev/null @@ -1,351 +0,0 @@ -""" -live_variables.py — Live Variables backward dataflow analysis for TRIPLA CFGs. - -A variable v is *live* at the entry of node n if there exists a path -n → … → use(v) where v is not redefined along the way. - -Data structures ---------------- -gen dict[int, set[Var]] — GEN(n) = variables *used* at n -kill dict[int, set[Var]] — KILL(n) = variables *defined* at n -in_sets dict[int, set[Var]] — live variables at node *entry* -out_sets dict[int, set[Var]] — live variables at node *exit* - -Transfer equations (backward): - OUT(n) = ∪ IN(s) for all successors s - IN(n) = (OUT(n) − KILL(n)) ∪ GEN(n) - -Variables are represented in scoped form ``(scope, name)``, e.g. ``("f","x")``. -This avoids collisions between equal variable names in different functions. - -This module also exports ``_BackwardAnalysisBase``, the shared base class -that ``ReachedUsesAnalysis`` in reached_uses.py inherits from. The base -provides: - • AST traversal to collect function-nesting and parameter metadata - • Lexical variable resolution (parameter shadowing handled correctly) - • BFS-based CFG-node → owning-function assignment - • Unified uses / defs extraction for all node types - -Var = tuple[str, str] -""" -from __future__ import annotations - -from collections import deque -from typing import TYPE_CHECKING - -import cfg_build -import syntax -from cfg.CFG_Node import CFG_START - -if TYPE_CHECKING: - from cfg.CFG import CFG - -# --------------------------------------------------------------------------- -# Public type alias (imported by reached_uses.py) -# --------------------------------------------------------------------------- - -GLOBAL_SCOPE = "" -Var = tuple[str, str] # (function_name|GLOBAL_SCOPE, variable_name) - - -# ============================================================================ -# Shared base: function metadata, scope assignment, uses/defs extraction -# ============================================================================ - -class _BackwardAnalysisBase: - """Infrastructure shared by LiveVariablesAnalysis and ReachedUsesAnalysis. - - Calling ``super().__init__(cfg)`` from a subclass: - 1. Snapshots cfg_build.FUNCTIONS. - 2. Collects AST-level function-nesting and parameter metadata. - 3. BFS-assigns every CFG node to its owning function. - 4. Extracts uses and defs for every CFG node. - - After __init__ the following attributes are available to subclasses: - - self.cfg — the CFG object - self._functions — dict[str, tuple]: snapshot of cfg_build.FUNCTIONS - self._func_parent — dict[str, str|None]: lexical parent per function - self._func_params — dict[str, tuple[str,...]]: params per function - self._func_scope — dict[int, str]: node-id → owning function name - self.uses — dict[int, set[Var]]: variables used at each node - self.defs — dict[int, set[Var]]: variables defined at each node - """ - - def __init__(self, cfg: "CFG") -> None: - self.cfg = cfg - # Snapshot FUNCTIONS so later global-state resets do not affect us. - self._functions: dict[str, tuple] = dict(cfg_build.FUNCTIONS) - - self.uses: dict[int, set[Var]] = {} - self.defs: dict[int, set[Var]] = {} - - self._func_parent, self._func_params = self._collect_function_metadata() - self._func_scope: dict[int, str] = self._compute_function_scope() - self._extract_uses_defs() - - # ------------------------------------------------------------------ - # Step 1a — Walk AST to collect lexical nesting + parameter lists - # ------------------------------------------------------------------ - - def _collect_function_metadata( - self, - ) -> tuple[dict[str, str | None], dict[str, tuple[str, ...]]]: - """Walk the AST and collect function-parent and parameter information. - - Returns - ------- - func_parent : dict[str, str | None] - func_parent[f] is the name of the immediately enclosing function - (or None for top-level functions). - func_params : dict[str, tuple[str, ...]] - func_params[f] is the ordered tuple of formal parameter names of f. - """ - func_parent: dict[str, str | None] = {} - func_params: dict[str, tuple[str, ...]] = {} - - def visit(expr: syntax.EXPRESSION | None, current_func: str | None) -> None: - if expr is None: - return - if isinstance(expr, syntax.LET): - decls = expr.decl if isinstance(expr.decl, list) else [expr.decl] - # Register metadata for each declared function. - for d in decls: - if isinstance(d, syntax.DECL): - # Use assignment (last-seen wins) to stay consistent - # with cfg_build.FUNCTIONS, which also overwrites on - # duplicate names. setdefault (first-seen wins) would - # disagree when a nested function shadows a top-level - # one with the same name, causing wrong scope resolution. - func_parent[d.f_name] = current_func - func_params[d.f_name] = tuple(d.params) - # Recurse into function bodies and the in-expression. - for d in decls: - if isinstance(d, syntax.DECL): - visit(d.body, d.f_name) - else: - visit(d, current_func) - visit(expr.body, current_func) - return - for _, child in expr.children(): - visit(child, current_func) - - visit(self.cfg.ast, None) - return func_parent, func_params - - # ------------------------------------------------------------------ - # Step 1b — Resolve a variable name through the lexical scope chain - # ------------------------------------------------------------------ - - def _resolve_var(self, func: str | None, name: str) -> Var: - """Resolve a variable name via lexical scope chain.""" - if func is None: - return (GLOBAL_SCOPE, name) - - cur: str | None = func - seen: set[str] = set() - while cur is not None and cur not in seen: - seen.add(cur) - if name in self._func_params.get(cur, ()): - return (cur, name) - cur = self._func_parent.get(cur) - - # Fallback: local variable in current function scope. - return (func, name) - - # ------------------------------------------------------------------ - # Step 2 — BFS-assign every CFG node to its owning function - # ------------------------------------------------------------------ - - def _compute_function_scope(self) -> dict[int, str]: - """BFS from each function's START node; return node-id → function-name. - - Two stopping conditions keep attribution strictly inside each function: - - 1. Do not follow into a *different* function's CFG_START (prevents - attributing callee body nodes to the caller, and vice-versa). - 2. Do not follow *past* the function's own CFG_END (prevents - following CFG_END → CFG_RETURN → continuation nodes that belong - to the *caller* context, which caused variables used there to be - resolved in the wrong scope). - - The first function whose BFS claims a node wins. - """ - functions = self._functions - func_scope: dict[int, str] = {} - all_f_start_ids: set[int] = {fs.id for _, (fs, _) in functions.items()} - - for f_name, (f_start, f_end) in functions.items(): - queue: deque = deque([f_start]) - while queue: - node = queue.popleft() - if node.id in func_scope: - continue # already claimed by an earlier function - func_scope[node.id] = f_name - # Stop here — do not follow CFG_END into caller context. - if node.id == f_end.id: - continue - for child in node.children: - # Do not follow into a different function's START. - if ( - isinstance(child, CFG_START) - and child.id in all_f_start_ids - and child.id != f_start.id - ): - continue - queue.append(child) - - return func_scope - - # ------------------------------------------------------------------ - # Step 3 — Extract uses / defs for every CFG node - # ------------------------------------------------------------------ - - def _extract_uses_defs(self) -> None: - """Populate ``self.uses`` and ``self.defs`` for every node in the CFG. - - Extraction rules: - • CFG_START(DECL f(p1,…,pk)) → defs = {(f,p1), …, (f,pk)} - • Node wrapping ID(x) → uses = {lexical_resolve(func, x)} - • Node wrapping ASSIGN(x = e) → defs = {lexical_resolve(func, x)} - • Everything else → uses = {}, defs = {} - - Sub-expressions already have their own CFG nodes and are not - re-inspected here; each node is responsible only for its own ast_node. - """ - for node in self.cfg.nodes(): - nid = node.id - func = self._func_scope.get(nid) # None → outer / global scope - ast = node.ast_node - - uses: set[Var] = set() - defs: set[Var] = set() - - if isinstance(node, CFG_START) and isinstance(ast, syntax.DECL): - # Function entry defines each formal parameter. - for param in ast.params: - defs.add((ast.f_name, param)) - elif ast is not None: - if isinstance(ast, syntax.ID): - resolved = self._resolve_var(func, ast.name) - uses.add(resolved) - elif isinstance(ast, syntax.ASSIGN): - resolved = self._resolve_var(func, ast.var.name) - defs.add(resolved) - - self.uses[nid] = uses - self.defs[nid] = defs - - -# ============================================================================ -# Live Variables Analysis -# ============================================================================ - -class LiveVariablesAnalysis(_BackwardAnalysisBase): - """Backward dataflow analysis: Live Variables. - - A variable (f, x) is *live* at the entry of node n if there is a path - from n to some use of (f, x) along which (f, x) is not redefined. - - This is the simpler predecessor to ReachedUsesAnalysis (reached_uses.py): - it tracks which variables are live, not *where* they are used. - - Attributes - ---------- - gen dict[int, set[Var]] GEN(n) = uses(n) — vars used at n - kill dict[int, set[Var]] KILL(n) = defs(n) — vars defined at n - in_sets dict[int, set[Var]] live variables at n's *entry* - out_sets dict[int, set[Var]] live variables at n's *exit* - - (uses and defs are identical to gen / kill and are inherited from the - base class.) - - Transfer equations (backward): - OUT(n) = ∪ IN(s) for all successors s - IN(n) = (OUT(n) − KILL(n)) ∪ GEN(n) - """ - - def __init__(self, cfg: "CFG") -> None: - # Base populates uses, defs, _func_scope, etc. - super().__init__(cfg) - - self.gen: dict[int, set[Var]] = {} - self.kill: dict[int, set[Var]] = {} - self.in_sets: dict[int, set[Var]] = {} - self.out_sets: dict[int, set[Var]] = {} - - self._build_gen_kill() - self.solve() - - # ------------------------------------------------------------------ - # Build gen / kill; initialise in / out to ∅ - # ------------------------------------------------------------------ - - def _build_gen_kill(self) -> None: - """GEN(n) = uses(n), KILL(n) = defs(n); initialise in/out sets.""" - for node in self.cfg.nodes(): - nid = node.id - self.gen[nid] = set(self.uses[nid]) - self.kill[nid] = set(self.defs[nid]) - self.in_sets[nid] = set() - self.out_sets[nid] = set() - - # ------------------------------------------------------------------ - # Backward worklist fixpoint - # ------------------------------------------------------------------ - - def solve(self) -> None: - """Backward worklist until fixpoint. - - Transfer: - OUT(n) = ∪ IN(s) for all successors s - IN(n) = (OUT(n) − KILL(n)) ∪ GEN(n) - - Only nodes reachable from cfg.START are processed (guard against - propagate=False parent references from CFG.__remove_and_rewire). - """ - nodes = list(self.cfg.nodes()) - known: set[int] = set(self.gen.keys()) - id_to_node = {n.id: n for n in nodes} - worklist: deque = deque(nodes) - - # Build predecessor relation from children edges. This is more reliable - # than node.parents because CFG rewiring may add edges with - # propagate=False, leaving parent links stale. - preds: dict[int, set[int]] = {nid: set() for nid in known} - for node in nodes: - for child in node.children: - if child.id in known: - preds[child.id].add(node.id) - - while worklist: - node = worklist.popleft() - nid = node.id - - new_out: set[Var] = set() - for child in node.children: - if child.id in known: - new_out |= self.in_sets[child.id] - - new_in: set[Var] = (new_out - self.kill[nid]) | self.gen[nid] - - if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]: - self.out_sets[nid] = new_out - self.in_sets[nid] = new_in - for pred_id in preds[nid]: - worklist.append(id_to_node[pred_id]) - - # ------------------------------------------------------------------ - # Result - # ------------------------------------------------------------------ - - def live_vars_by_node(self) -> dict[int, set[Var]]: - """Return the live-variable set at the *entry* of each node. - - Returns - ------- - dict[int, set[Var]] - Keys: CFG node ids whose in_set is non-empty. - Values: copy of the live-variable set at that node's entry. - """ - return {nid: set(vs) for nid, vs in self.in_sets.items() if vs} diff --git a/Project-02-03-04-05/cfa/reached_uses.py b/Project-02-03-04-05/cfa/reached_uses.py deleted file mode 100644 index d4a3a58..0000000 --- a/Project-02-03-04-05/cfa/reached_uses.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -reached_uses.py — Reached-Uses backward dataflow analysis for TRIPLA CFGs. - -Extends ``_BackwardAnalysisBase`` from live_variables.py, which provides the -shared function-scope resolution and uses/defs extraction machinery. The Live -Variables analysis (LiveVariablesAnalysis) in that module is the simpler -predecessor of this analysis (tip from the course notes: implement LV first, -then extend to RU). - -How ReachedUsesAnalysis extends LiveVariablesAnalysis ------------------------------------------------------- -Live Variables tracks *which* variables are live at each node (set[Var]). -Reached Uses additionally tracks *where* each variable is used by attaching -the use-node id to every fact, giving set[UseFact] = set[tuple[int, Var]]. - -The transfer function changes accordingly: - LV: IN(n) = (OUT(n) − KILL_LV(n)) ∪ GEN_LV(n) [sets of Var] - RU: IN(n) = (OUT(n) − KILL_RU(n)) ∪ GEN_RU(n) [sets of UseFact] - - GEN_LV(n) = uses(n) — set[Var] - GEN_RU(n) = { (n.id, var) | var ∈ uses(n) } — set[UseFact] - - KILL_LV(n) = defs(n) — set[Var] - KILL_RU(n) = { (uid, var) | var ∈ defs(n), — set[UseFact] - (uid, var) ∈ all_uses_by_var[var] } - - The set-difference in both cases removes exactly the facts for variables - that are defined at n — equivalent to the ⊖ operator from the lecture - slides (M ⊖ K = {(p,id) ∈ M | id ∉ K}). - -Type aliases ------------- - Var = tuple[str, str] # (scope, variable_name) - UseFact = tuple[int, Var] # (use_node_id, scoped_var) - -Analysis attributes (all populated after construction) ------------------------------------------------------- - uses dict[int, set[Var]] - defs dict[int, set[Var]] - gen dict[int, set[UseFact]] - kill dict[int, set[UseFact]] - in_sets dict[int, set[UseFact]] - out_sets dict[int, set[UseFact]] - all_uses_by_var dict[Var, set[UseFact]] - -Final result ------------- - reached_uses_by_node() → dict[int, list[int]] - Keys: defining-node ids - Values: sorted, deduplicated list of use-node ids reached by the def -""" -from __future__ import annotations - -from collections import deque -from typing import TYPE_CHECKING - -# Import the shared base class (and Var) from the Live Variables module. -from cfa.live_variables import _BackwardAnalysisBase, Var - -if TYPE_CHECKING: - from cfg.CFG import CFG - -# --------------------------------------------------------------------------- -# Public type aliases (re-exported so tests/reached_uses_stub.py can pick up -# ReachedUsesAnalysis without needing to know about live_variables.py) -# --------------------------------------------------------------------------- - -UseFact = tuple[int, Var] # (use_node_id, scoped_var) - - -# ============================================================================ -# Reached-Uses Analysis -# ============================================================================ - -class ReachedUsesAnalysis(_BackwardAnalysisBase): - """Backward dataflow analysis: Reached Uses. - - Inherits uses/defs extraction and function-scope resolution from - _BackwardAnalysisBase (live_variables.py). Extends it with use-fact - tracking: each fact carries the id of the node where the variable is used, - enabling def-use pairs to be recovered from the fixpoint solution. - - Transfer equations (backward): - OUT(n) = ∪ IN(s) for all successors s - IN(n) = GEN(n) ∪ (OUT(n) − KILL(n)) - - GEN(n) = { (n.id, var) | var ∈ uses(n) } - KILL(n) = { (uid, var) | var ∈ defs(n), - (uid, var) ∈ all_uses_by_var[var] } - """ - - def __init__(self, cfg: "CFG") -> None: - # Base populates: uses, defs, _func_scope, _func_parent, _func_params. - super().__init__(cfg) - - self.gen: dict[int, set[UseFact]] = {} - self.kill: dict[int, set[UseFact]] = {} - self.in_sets: dict[int, set[UseFact]] = {} - self.out_sets: dict[int, set[UseFact]] = {} - self.all_uses_by_var: dict[Var, set[UseFact]] = {} - - self._build_gen_kill() - self.solve() - - # ------------------------------------------------------------------ - # Step 1 — Build gen, kill, all_uses_by_var; initialise in/out - # ------------------------------------------------------------------ - - def _build_gen_kill(self) -> None: - """Compute gen and kill sets; populate all_uses_by_var.""" - # GEN[n] = { (n.id, var) | var ∈ uses[n] } - for node in self.cfg.nodes(): - nid = node.id - self.gen[nid] = {(nid, var) for var in self.uses[nid]} - self.in_sets[nid] = set() - self.out_sets[nid] = set() - - # all_uses_by_var: index all use-facts by their variable. - for nid, facts in self.gen.items(): - for (uid, var) in facts: - self.all_uses_by_var.setdefault(var, set()).add((uid, var)) - - # KILL[n] = all use-facts for variables defined at n. - for node in self.cfg.nodes(): - nid = node.id - kill_n: set[UseFact] = set() - for var in self.defs[nid]: - if var in self.all_uses_by_var: - kill_n |= self.all_uses_by_var[var] - self.kill[nid] = kill_n - - # ------------------------------------------------------------------ - # Step 2 — Backward worklist fixpoint - # ------------------------------------------------------------------ - - def solve(self) -> None: - """Backward worklist until fixpoint. - - Transfer: - OUT(n) = ∪ IN(s) for all successors s - IN(n) = GEN(n) ∪ (OUT(n) − KILL(n)) - - Only nodes reachable from cfg.START are processed (guard against - propagate=False parent references from CFG.__remove_and_rewire). - """ - nodes = list(self.cfg.nodes()) - known: set[int] = set(self.gen.keys()) # ids of cfg.nodes() - id_to_node = {n.id: n for n in nodes} - worklist: deque = deque(nodes) - - # Build predecessor relation from children edges. CFG rewiring may - # create edges with propagate=False, so node.parents can be stale. - preds: dict[int, set[int]] = {nid: set() for nid in known} - for node in nodes: - for child in node.children: - if child.id in known: - preds[child.id].add(node.id) - - while worklist: - node = worklist.popleft() - nid = node.id - - new_out: set[UseFact] = set() - for child in node.children: - if child.id in known: - new_out |= self.in_sets[child.id] - - new_in: set[UseFact] = self.gen[nid] | (new_out - self.kill[nid]) - - if new_out != self.out_sets[nid] or new_in != self.in_sets[nid]: - self.out_sets[nid] = new_out - self.in_sets[nid] = new_in - for pred_id in preds[nid]: - worklist.append(id_to_node[pred_id]) - - # ------------------------------------------------------------------ - # Public result - # ------------------------------------------------------------------ - - def reached_uses_by_node(self) -> dict[int, list[int]]: - """Return the final reached-uses result. - - For each defining node d: - result[d.id] = sorted list of use-node ids u such that - (u, var) ∈ OUT[d] for some var ∈ defs[d]. - - Semantics: the definition at d of variable var reaches the use at u - if there is a CFG path d → … → u along which var is not redefined. - - Only nodes with at least one definition appear as keys. - """ - result: dict[int, list[int]] = {} - for node in self.cfg.nodes(): - nid = node.id - defs_n = self.defs[nid] - if not defs_n: - continue - reached: set[int] = set() - for (uid, var) in self.out_sets[nid]: - if var in defs_n: - reached.add(uid) - result[nid] = sorted(reached) - return result diff --git a/Project-02-03-04-05/cfa/analysis_dot.py b/Project-02-03-04-05/cfa/to_dot.py similarity index 88% rename from Project-02-03-04-05/cfa/analysis_dot.py rename to Project-02-03-04-05/cfa/to_dot.py index d38b91a..899faf8 100644 --- a/Project-02-03-04-05/cfa/analysis_dot.py +++ b/Project-02-03-04-05/cfa/to_dot.py @@ -2,8 +2,7 @@ import syntax import colorsys from cfg.CFG_Node import CFG_DIAMOND - -def _expr_used_names(expr) -> set[str]: +def __expr_used_names(expr) -> set[str]: """Collect variable names (syntax.ID) used inside an expression subtree.""" used: set[str] = set() @@ -20,9 +19,8 @@ def _expr_used_names(expr) -> set[str]: visit(expr) return used - -def _show_analysis_on_node(node) -> bool: - """Return True if analysis annotations should be displayed for this node.""" +# Weather a node should display analysis annotations +def __should_display_analysis(node) -> bool: ast = node.ast_node if isinstance(node, CFG_DIAMOND): return False @@ -47,13 +45,13 @@ def _show_analysis_on_node(node) -> bool: def _lv_in_for_display(node, analysis): """Display-level IN set for LV.""" - in_set = set(analysis.in_sets.get(node.id, set())) + in_set = set(analysis.incoming.get(node.id, set())) ast_node = node.ast_node if isinstance(ast_node, syntax.ASSIGN): - func = analysis._func_scope.get(node.id) + func = analysis.__func_scope.get(node.id) rhs_vars = { - analysis._resolve_var(func, name) - for name in _expr_used_names(ast_node.expr) + analysis.__resolve_var(func, name) + for name in __expr_used_names(ast_node.expr) } in_set |= rhs_vars return in_set @@ -83,22 +81,22 @@ def run_all_analyses(cfg): """ node_by_id = {n.id: n for n in cfg.nodes()} - from cfa.live_variables import LiveVariablesAnalysis - from cfa.reached_uses import ReachedUsesAnalysis + from cfa.LiveVariables import LiveVariablesAnalysis + from cfa.ReachedUses import ReachedUsesAnalysis lv = LiveVariablesAnalysis(cfg) ru = ReachedUsesAnalysis(cfg) - all_ids = set(lv.in_sets.keys()) | set(lv.out_sets.keys()) + all_ids = set(lv.incoming.keys()) | set(lv.outgoing.keys()) annotations = { nid: ( "LivingVariables\\n" f"In := {sorted(_lv_in_for_display(node_by_id[nid], lv))}\\n" - f"Out := {sorted(lv.out_sets.get(nid, set()))}" + f"Out := {sorted(lv.outgoing.get(nid, set()))}" ) for nid in all_ids - if lv.in_sets.get(nid, set()) or lv.out_sets.get(nid, set()) - if nid in node_by_id and _show_analysis_on_node(node_by_id[nid]) + if lv.incoming.get(nid, set()) or lv.outgoing.get(nid, set()) + if nid in node_by_id and __should_display_analysis(node_by_id[nid]) } return {"lv": lv, "ru": ru}, annotations, ru.reached_uses_by_node() diff --git a/Project-02-03-04-05/main.py b/Project-02-03-04-05/main.py index 3655dbc..60cf63b 100644 --- a/Project-02-03-04-05/main.py +++ b/Project-02-03-04-05/main.py @@ -11,7 +11,7 @@ import cfg_build import lib.console as cnsl import syntax import triplayacc as yacc -from cfa.analysis_dot import analysis_to_dot, run_all_analyses +from cfa.to_dot import analysis_to_dot, run_all_analyses from cfg.CFG import CFG from vistram.tram import * from vistram.vistram import MachineUI @@ -81,10 +81,10 @@ def print_analysis_reports(cfg, analyses: dict, ru_edges: dict[int, list[int]]): print("\nLive Variables Report") print("---------------------") - node_ids = sorted(set(lv.in_sets.keys()) | set(lv.out_sets.keys())) + node_ids = sorted(set(lv.incoming.keys()) | set(lv.outgoing.keys())) for nid in node_ids: - in_set = sorted(lv.in_sets.get(nid, set())) - out_set = sorted(lv.out_sets.get(nid, set())) + in_set = sorted(lv.incoming.get(nid, set())) + out_set = sorted(lv.outgoing.get(nid, set())) if not in_set and not out_set: continue print(f"n{nid} [{node_text(nid)}]: In={in_set} Out={out_set}")