from lang import * from abc import ABC, abstractmethod class DataFlowEq(ABC): """ A class that implements a data-flow equation. The key trait of a data-flow equation is an `eval` method, which evaluates that equation. The evaluation of an equation might change the environment that associates data-flow facts with identifiers. Attributes: num_evals the number of times that constraints have been evaluated. Remember to zero this attribute once you start a new static analysis, so that you can correctly count how many times each equation had to be evaluated to solve the analysis. """ num_evals = 0 def __init__(self, instruction): """ Every data-flow equation is produced out of a program instruction. The initialization of the data-flow equation verifies if, indeed, the input object is an instruction. """ assert isinstance(instruction, Inst) self.inst = instruction @classmethod @abstractmethod def name(self) -> str: """ The name of a data-flow equation is used to retrieve the data-flow facts associated with that equation in the environment. For instance, imagine that we have an equation like this one below: "OUT[p] = (v, p) + (IN[p] - (v, _))" This equation affects OUT[p]. We store OUT[p] in a dictionary. The name of the equation is used as the key in this dictionary. For instance, the name of the equation could be 'OUT_p'. """ raise NotImplementedError @classmethod @abstractmethod def deps(self) -> list: """ A list with the name of all the constraints that this equation depends upon. For instance, if the equation is like: "OUT[p] = (v, p) + (IN[p] - (v, _))" Then, self.deps() == ['IN_p'] """ raise NotImplementedError @classmethod @abstractmethod def eval_aux(self, data_flow_env) -> set: """ This method determines how each concrete equation evaluates itself. In a way, this design implements the 'template method' pattern. In other words, the DataFlowEq class implements a concrete method eval, which calls the abstract method eval_aux. It is the concrete implementation of eval_aux that determines how the environment is affected by the evaluation of a given equation. """ raise NotImplementedError def eval(self, data_flow_env) -> bool: """ This method implements the abstract evaluation of a data-flow equation. Notice that the actual semantics of this evaluation will be implemented by the `èval_aux` method, which is abstract. """ DataFlowEq.num_evals += 1 old_env = data_flow_env[self.name()] data_flow_env[self.name()] = self.eval_aux(data_flow_env) return True if data_flow_env[self.name()] != old_env else False def name_in(ID): """ The name of an IN set is always ID + _IN. Eg.: >>> Inst.next_index = 0 >>> add = Add('x', 'a', 'b') >>> name_in(add.ID) 'IN_0' """ return f"IN_{ID}" class IN_Eq(DataFlowEq): """ This abstract class represents all the equations that affect the IN set related to some program point. """ def name(self): return name_in(self.inst.ID) def name_out(ID): """ The name of an OUT set is always ID + _OUT. Eg.: >>> Inst.next_index = 0 >>> add = Add('x', 'a', 'b') >>> name_out(add.ID) 'OUT_0' """ return f"OUT_{ID}" class OUT_Eq(DataFlowEq): """ This abstract class represents all the equations that affect the OUT set related to some program point. """ def name(self): return name_out(self.inst.ID) class ReachingDefs_Bin_OUT_Eq(OUT_Eq): """ This concrete class implements the equations that affect OUT facts of the reaching-definitions analysis for binary instructions. These instructions have three fields: dst, src0 and src1; however, only the former is of interest for these equations. """ def eval_aux(self, data_flow_env): """ Evaluates this equation, where: OUT[p] = (v, p) + (IN[p] - (v, _)) Example: >>> Inst.next_index = 0 >>> i0 = Add('x', 'a', 'b') >>> df = ReachingDefs_Bin_OUT_Eq(i0) >>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}})) [('x', 0), ('y', 2)] """ in_set = data_flow_env[name_in(self.inst.ID)] new_set = {(v, p) for (v, p) in in_set if v != self.inst.dst} return new_set.union([(self.inst.dst, self.inst.ID)]) def deps(self): """ The list of dependencies of this equation. Ex.: >>> Inst.next_index = 0 >>> add = Add('x', 'a', 'b') >>> df = ReachingDefs_Bin_OUT_Eq(add) >>> df.deps() ['IN_0'] """ return [name_in(self.inst.ID)] def __str__(self): """ A string representation of a reaching-defs equation representing a binary instruction. Eg.: >>> Inst.next_index = 0 >>> add = Add('x', 'a', 'b') >>> df = ReachingDefs_Bin_OUT_Eq(add) >>> str(df) 'OUT_0: (x, 0) + (IN_0 - (x, _))' """ kill_set = f" + ({name_in(self.inst.ID)} - ({self.inst.dst}, _))" gen_set = f"({self.inst.dst}, {self.inst.ID})" return f"{self.name()}: {gen_set}{kill_set}" class ReachingDefs_Bt_OUT_Eq(OUT_Eq): """ This concrete class implements the equations that affect OUT facts of the reaching-definitions analysis for branch instructions. These instructions do not affect reaching definitions at all. Therefore, their equations are mostly treated as identity functions. """ def eval_aux(self, data_flow_env): """ Evaluates this equation. Notice that the reaching definition equation for a branch instruction is simply the identity function. OUT[p] = IN[p] Example: >>> Inst.next_index = 0 >>> i0 = Bt('x') >>> df = ReachingDefs_Bt_OUT_Eq(i0) >>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}})) [('x', 1), ('y', 2)] """ return data_flow_env[name_in(self.inst.ID)] def deps(self): """ The list of dependencies of this equation. Ex.: >>> Inst.next_index = 0 >>> i = Bt('x') >>> df = ReachingDefs_Bt_OUT_Eq(i) >>> df.deps() ['IN_0'] """ return [name_in(self.inst.ID)] def __str__(self): """ A string representation of a reaching-defs equation representing a branch. Eg.: >>> Inst.next_index = 0 >>> i = Bt('x') >>> df = ReachingDefs_Bt_OUT_Eq(i) >>> str(df) 'OUT_0: IN_0' """ kill_set = f"{name_in(self.inst.ID)}" gen_set = f"" return f"{self.name()}: {gen_set}{kill_set}" class ReachingDefs_IN_Eq(IN_Eq): """ This concrete class implements the meet operation for reaching-definition analysis. The meet operation produces the IN set of a program point. This IN set is the union of the OUT set of the predecessors of this point. """ def eval_aux(self, data_flow_env): """ The evaluation of the meet operation over reaching definitions is the union of the OUT sets of the predecessors of the instruction. Example: >>> Inst.next_index = 0 >>> i0 = Add('x', 'a', 'b') >>> i1 = Add('x', 'c', 'd') >>> i2 = Add('y', 'x', 'x') >>> i0.add_next(i2) >>> i1.add_next(i2) >>> df = ReachingDefs_IN_Eq(i2) >>> sorted(df.eval_aux({'OUT_0': {('x', 0)}, 'OUT_1': {('x', 1)}})) [('x', 0), ('x', 1)] """ solution = set() for inst in self.inst.preds: solution = solution.union(data_flow_env[name_out(inst.ID)]) return solution def deps(self): """ The list of dependencies of this equation. Ex.: >>> Inst.next_index = 0 >>> i0 = Add('x', 'a', 'b') >>> i1 = Add('x', 'c', 'd') >>> i2 = Add('y', 'x', 'x') >>> i0.add_next(i2) >>> i1.add_next(i2) >>> df = ReachingDefs_IN_Eq(i2) >>> sorted(df.deps()) ['OUT_0', 'OUT_1'] """ # TODO: Implement this method return [] def __str__(self): """ The name of an IN set is always ID + _IN. Example: >>> Inst.next_index = 0 >>> i0 = Add('x', 'a', 'b') >>> i1 = Add('x', 'c', 'd') >>> i2 = Add('y', 'x', 'x') >>> i0.add_next(i2) >>> i1.add_next(i2) >>> df = ReachingDefs_IN_Eq(i2) >>> str(df) 'IN_2: Union( OUT_0, OUT_1 )' """ succs = ", ".join([name_out(pred.ID) for pred in self.inst.preds]) return f"{self.name()}: Union( {succs} )" def reaching_defs_constraint_gen(insts): """ Builds a list of equations to solve Reaching-Definition Analysis for the given set of instructions. Example: >>> Inst.next_index = 0 >>> i0 = Add('c', 'a', 'b') >>> i1 = Mul('d', 'c', 'a') >>> i2 = Lth('e', 'c', 'd') >>> i0.add_next(i2) >>> i1.add_next(i2) >>> insts = [i0, i1, i2] >>> sol = [str(eq) for eq in reaching_defs_constraint_gen(insts)] >>> sol[0] + " " + sol[-1] 'OUT_0: (c, 0) + (IN_0 - (c, _)) IN_2: Union( OUT_0, OUT_1 )' """ in0 = [ReachingDefs_Bin_OUT_Eq(i) for i in insts if isinstance(i, BinOp)] in1 = [ReachingDefs_Bt_OUT_Eq(i) for i in insts if isinstance(i, Bt)] out = [ReachingDefs_IN_Eq(i) for i in insts] return in0 + in1 + out def abstract_interp(equations): """ This function iterates on the equations, solving them in the order in which they appear. It returns an environment with the solution to the data-flow analysis. Example for reaching-definition analysis: >>> Inst.next_index = 0 >>> i0 = Add('c', 'a', 'b') >>> i1 = Mul('d', 'c', 'a') >>> i0.add_next(i1) >>> eqs = reaching_defs_constraint_gen([i0, i1]) >>> (sol, num_evals) = abstract_interp(eqs) >>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}" "OUT_0: [('c', 0)], Num Evals: 12" """ from functools import reduce DataFlowEq.num_evals = 0 env = {eq.name(): set() for eq in equations} changed = True while changed: changed = reduce(lambda acc, eq: eq.eval(env) or acc, equations, False) return (env, DataFlowEq.num_evals) def build_dependence_graph(equations): """ This function builds the dependence graph of equations. Example: >>> Inst.next_index = 0 >>> i0 = Add('c', 'a', 'b') >>> i1 = Mul('d', 'c', 'a') >>> i0.add_next(i1) >>> eqs = reaching_defs_constraint_gen([i0, i1]) >>> deps = build_dependence_graph(eqs) >>> [eq.name() for eq in deps['IN_0']] ['OUT_0'] """ # TODO: implement this method dep_graph = {eq.name(): [] for eq in equations} return dep_graph def abstract_interp_worklist(equations): """ This function solves the system of equations using a worklist. Once an equation E is evaluated, and the evaluation changes the environment, only the dependencies of E are pushed onto the worklist. Example for reaching-definition analysis: >>> Inst.next_index = 0 >>> i0 = Add('c', 'a', 'b') >>> i1 = Mul('d', 'c', 'a') >>> i0.add_next(i1) >>> eqs = reaching_defs_constraint_gen([i0, i1]) >>> (sol, num_evals) = abstract_interp_worklist(eqs) >>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}" "OUT_0: [('c', 0)], Num Evals: 6" """ # TODO: implement this method from collections import defaultdict DataFlowEq.num_evals = 0 env = defaultdict(list) return (env, DataFlowEq.num_evals)