commit ee6afb763b151f0feb62d7b9f851bf81840c0cda Author: Vinicius Silva Date: Sat Apr 20 14:06:48 2024 -0300 Initial commit diff --git a/Worklist Algorithms.zip b/Worklist Algorithms.zip new file mode 100644 index 0000000..3e8739a Binary files /dev/null and b/Worklist Algorithms.zip differ diff --git a/dataflow.py b/dataflow.py new file mode 100644 index 0000000..ffbb7a6 --- /dev/null +++ b/dataflow.py @@ -0,0 +1,374 @@ +from lang import * +from abc import ABC, abstractmethod + + +class DataFlowEq(ABC): + """ + A class that implements a data-flow equation. The key trait of a data-flow + equation is an `eval` method, which evaluates that equation. The evaluation + of an equation might change the environment that associates data-flow facts + with identifiers. + + Attributes: + num_evals the number of times that constraints have been evaluated. + Remember to zero this attribute once you start a new static + analysis, so that you can correctly count how many times each + equation had to be evaluated to solve the analysis. + """ + + num_evals = 0 + + def __init__(self, instruction): + """ + Every data-flow equation is produced out of a program instruction. The + initialization of the data-flow equation verifies if, indeed, the input + object is an instruction. + """ + assert isinstance(instruction, Inst) + self.inst = instruction + + @classmethod + @abstractmethod + def name(self) -> str: + """ + The name of a data-flow equation is used to retrieve the data-flow + facts associated with that equation in the environment. For instance, + imagine that we have an equation like this one below: + + "OUT[p] = (v, p) + (IN[p] - (v, _))" + + This equation affects OUT[p]. We store OUT[p] in a dictionary. The name + of the equation is used as the key in this dictionary. For instance, + the name of the equation could be 'OUT_p'. + """ + raise NotImplementedError + + @classmethod + @abstractmethod + def deps(self) -> list: + """ + A list with the name of all the constraints that this equation depends + upon. For instance, if the equation is like: + + "OUT[p] = (v, p) + (IN[p] - (v, _))" + + Then, self.deps() == ['IN_p'] + """ + raise NotImplementedError + + @classmethod + @abstractmethod + def eval_aux(self, data_flow_env) -> set: + """ + This method determines how each concrete equation evaluates itself. + In a way, this design implements the 'template method' pattern. In other + words, the DataFlowEq class implements a concrete method eval, which + calls the abstract method eval_aux. It is the concrete implementation of + eval_aux that determines how the environment is affected by the + evaluation of a given equation. + """ + raise NotImplementedError + + def eval(self, data_flow_env) -> bool: + """ + This method implements the abstract evaluation of a data-flow equation. + Notice that the actual semantics of this evaluation will be implemented + by the `èval_aux` method, which is abstract. + """ + DataFlowEq.num_evals += 1 + old_env = data_flow_env[self.name()] + data_flow_env[self.name()] = self.eval_aux(data_flow_env) + return True if data_flow_env[self.name()] != old_env else False + + +def name_in(ID): + """ + The name of an IN set is always ID + _IN. Eg.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> name_in(add.ID) + 'IN_0' + """ + return f"IN_{ID}" + + +class IN_Eq(DataFlowEq): + """ + This abstract class represents all the equations that affect the IN set + related to some program point. + """ + + def name(self): + return name_in(self.inst.ID) + + +def name_out(ID): + """ + The name of an OUT set is always ID + _OUT. Eg.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> name_out(add.ID) + 'OUT_0' + """ + return f"OUT_{ID}" + + +class OUT_Eq(DataFlowEq): + """ + This abstract class represents all the equations that affect the OUT set + related to some program point. + """ + + def name(self): + return name_out(self.inst.ID) + + +class ReachingDefs_Bin_OUT_Eq(OUT_Eq): + """ + This concrete class implements the equations that affect OUT facts of the + reaching-definitions analysis for binary instructions. These instructions + have three fields: dst, src0 and src1; however, only the former is of + interest for these equations. + """ + + def eval_aux(self, data_flow_env): + """ + Evaluates this equation, where: + OUT[p] = (v, p) + (IN[p] - (v, _)) + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> df = ReachingDefs_Bin_OUT_Eq(i0) + >>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}})) + [('x', 0), ('y', 2)] + """ + in_set = data_flow_env[name_in(self.inst.ID)] + new_set = {(v, p) for (v, p) in in_set if v != self.inst.dst} + return new_set.union([(self.inst.dst, self.inst.ID)]) + + def deps(self): + """ + The list of dependencies of this equation. Ex.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> df = ReachingDefs_Bin_OUT_Eq(add) + >>> df.deps() + ['IN_0'] + """ + return [name_in(self.inst.ID)] + + def __str__(self): + """ + A string representation of a reaching-defs equation representing + a binary instruction. Eg.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> df = ReachingDefs_Bin_OUT_Eq(add) + >>> str(df) + 'OUT_0: (x, 0) + (IN_0 - (x, _))' + """ + kill_set = f" + ({name_in(self.inst.ID)} - ({self.inst.dst}, _))" + gen_set = f"({self.inst.dst}, {self.inst.ID})" + return f"{self.name()}: {gen_set}{kill_set}" + + +class ReachingDefs_Bt_OUT_Eq(OUT_Eq): + """ + This concrete class implements the equations that affect OUT facts of the + reaching-definitions analysis for branch instructions. These instructions + do not affect reaching definitions at all. Therefore, their equations are + mostly treated as identity functions. + """ + + def eval_aux(self, data_flow_env): + """ + Evaluates this equation. Notice that the reaching definition equation + for a branch instruction is simply the identity function. + OUT[p] = IN[p] + + Example: + >>> Inst.next_index = 0 + >>> i0 = Bt('x') + >>> df = ReachingDefs_Bt_OUT_Eq(i0) + >>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}})) + [('x', 1), ('y', 2)] + """ + return data_flow_env[name_in(self.inst.ID)] + + def deps(self): + """ + The list of dependencies of this equation. Ex.: + >>> Inst.next_index = 0 + >>> i = Bt('x') + >>> df = ReachingDefs_Bt_OUT_Eq(i) + >>> df.deps() + ['IN_0'] + """ + return [name_in(self.inst.ID)] + + def __str__(self): + """ + A string representation of a reaching-defs equation representing a + branch. Eg.: + >>> Inst.next_index = 0 + >>> i = Bt('x') + >>> df = ReachingDefs_Bt_OUT_Eq(i) + >>> str(df) + 'OUT_0: IN_0' + """ + kill_set = f"{name_in(self.inst.ID)}" + gen_set = f"" + return f"{self.name()}: {gen_set}{kill_set}" + + +class ReachingDefs_IN_Eq(IN_Eq): + """ + This concrete class implements the meet operation for reaching-definition + analysis. The meet operation produces the IN set of a program point. This + IN set is the union of the OUT set of the predecessors of this point. + """ + + def eval_aux(self, data_flow_env): + """ + The evaluation of the meet operation over reaching definitions is the + union of the OUT sets of the predecessors of the instruction. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> i1 = Add('x', 'c', 'd') + >>> i2 = Add('y', 'x', 'x') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> df = ReachingDefs_IN_Eq(i2) + >>> sorted(df.eval_aux({'OUT_0': {('x', 0)}, 'OUT_1': {('x', 1)}})) + [('x', 0), ('x', 1)] + """ + solution = set() + for inst in self.inst.preds: + solution = solution.union(data_flow_env[name_out(inst.ID)]) + return solution + + def deps(self): + """ + The list of dependencies of this equation. Ex.: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> i1 = Add('x', 'c', 'd') + >>> i2 = Add('y', 'x', 'x') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> df = ReachingDefs_IN_Eq(i2) + >>> sorted(df.deps()) + ['OUT_0', 'OUT_1'] + """ + # TODO: Implement this method + return [] + + def __str__(self): + """ + The name of an IN set is always ID + _IN. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> i1 = Add('x', 'c', 'd') + >>> i2 = Add('y', 'x', 'x') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> df = ReachingDefs_IN_Eq(i2) + >>> str(df) + 'IN_2: Union( OUT_0, OUT_1 )' + """ + succs = ", ".join([name_out(pred.ID) for pred in self.inst.preds]) + return f"{self.name()}: Union( {succs} )" + + +def reaching_defs_constraint_gen(insts): + """ + Builds a list of equations to solve Reaching-Definition Analysis for the + given set of instructions. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i2 = Lth('e', 'c', 'd') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> insts = [i0, i1, i2] + >>> sol = [str(eq) for eq in reaching_defs_constraint_gen(insts)] + >>> sol[0] + " " + sol[-1] + 'OUT_0: (c, 0) + (IN_0 - (c, _)) IN_2: Union( OUT_0, OUT_1 )' + """ + in0 = [ReachingDefs_Bin_OUT_Eq(i) for i in insts if isinstance(i, BinOp)] + in1 = [ReachingDefs_Bt_OUT_Eq(i) for i in insts if isinstance(i, Bt)] + out = [ReachingDefs_IN_Eq(i) for i in insts] + return in0 + in1 + out + + +def abstract_interp(equations): + """ + This function iterates on the equations, solving them in the order in which + they appear. It returns an environment with the solution to the data-flow + analysis. + + Example for reaching-definition analysis: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i0.add_next(i1) + >>> eqs = reaching_defs_constraint_gen([i0, i1]) + >>> (sol, num_evals) = abstract_interp(eqs) + >>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}" + "OUT_0: [('c', 0)], Num Evals: 12" + """ + from functools import reduce + + DataFlowEq.num_evals = 0 + env = {eq.name(): set() for eq in equations} + changed = True + while changed: + changed = reduce(lambda acc, eq: eq.eval(env) or acc, equations, False) + return (env, DataFlowEq.num_evals) + +def build_dependence_graph(equations): + """ + This function builds the dependence graph of equations. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i0.add_next(i1) + >>> eqs = reaching_defs_constraint_gen([i0, i1]) + >>> deps = build_dependence_graph(eqs) + >>> [eq.name() for eq in deps['IN_0']] + ['OUT_0'] + """ + # TODO: implement this method + dep_graph = {eq.name(): [] for eq in equations} + return dep_graph + +def abstract_interp_worklist(equations): + """ + This function solves the system of equations using a worklist. Once an + equation E is evaluated, and the evaluation changes the environment, only + the dependencies of E are pushed onto the worklist. + + Example for reaching-definition analysis: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i0.add_next(i1) + >>> eqs = reaching_defs_constraint_gen([i0, i1]) + >>> (sol, num_evals) = abstract_interp_worklist(eqs) + >>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}" + "OUT_0: [('c', 0)], Num Evals: 6" + """ + # TODO: implement this method + from collections import defaultdict + DataFlowEq.num_evals = 0 + env = defaultdict(list) + return (env, DataFlowEq.num_evals) \ No newline at end of file diff --git a/driver.py b/driver.py new file mode 100644 index 0000000..f893608 --- /dev/null +++ b/driver.py @@ -0,0 +1,30 @@ +import sys +import lang +import parser +import dataflow + +from lang import interp + + +def chaotic_solver(program): + equations = dataflow.reaching_defs_constraint_gen(program) + return dataflow.abstract_interp(equations) + + +def worklist_solver(program): + equations = dataflow.reaching_defs_constraint_gen(program) + return dataflow.abstract_interp_worklist(equations) + + +if __name__ == "__main__": + """ + This function reads a program, and solves reaching definition analysis + for it, using either chaotic iterations or the worklist-based algorithm. + """ + lang.Inst.next_index = 0 + lines = sys.stdin.readlines() + env, program = parser.file2cfg_and_env(lines) + (env_chaotic, n_chaotic) = chaotic_solver(program) + (env_worklist, n_worklist) = worklist_solver(program) + print(f"Are the environments the same? {env_chaotic == env_worklist}") + print(f"Does it iterate less than chaotic-sol? {n_worklist <= n_chaotic}") \ No newline at end of file diff --git a/lang.py b/lang.py new file mode 100644 index 0000000..638d307 --- /dev/null +++ b/lang.py @@ -0,0 +1,292 @@ +""" +This file contains the implementation of a simple interpreter of low-level +instructions. The interpreter takes a program, represented as its first +instruction, plus an environment, which is a stack of bindings. Bindings are +pairs of variable names and values. New bindings are added to the stack +whenever new variables are defined. Bindings are never removed from the stack. +In this way, we can inspect the history of state transformations caused by the +interpretation of a program. + +This file uses doctests all over. To test it, just run python 3 as follows: +"python3 -m doctest main.py". The program uses syntax that is excluive of +Python 3. It will not work with standard Python 2. +""" + +from collections import deque +from abc import ABC, abstractmethod + + +class Env: + """ + A table that associates variables with values. The environment is + implemented as a stack, so that previous bindings of a variable V remain + available in the environment if V is overassigned. + + Example: + >>> e = Env() + >>> e.set("a", 2) + >>> e.set("a", 3) + >>> e.get("a") + 3 + + >>> e = Env({"b": 5}) + >>> e.set("a", 2) + >>> e.get("a") + e.get("b") + 7 + """ + + def __init__(s, initial_args={}): + s.env = deque() + for var, value in initial_args.items(): + s.env.appendleft((var, value)) + + def get(self, var): + """ + Finds the first occurrence of variable 'var' in the environment stack, + and returns the value associated with it. + """ + val = next((value for (e_var, value) in self.env if e_var == var), None) + if val is not None: + return val + else: + raise LookupError(f"Absent key {val}") + + def set(s, var, value): + """ + This method adds 'var' to the environment, by placing the binding + '(var, value)' onto the top of the environment stack. + """ + s.env.appendleft((var, value)) + + def dump(s): + """ + Prints the contents of the environment. This method is mostly used for + debugging purposes. + """ + for var, value in s.env: + print(f"{var}: {value}") + + +class Inst(ABC): + """ + The representation of instructions. All that an instruction has, that is + common among all the instructions, is the next_inst attribute. This + attribute determines the next instruction that will be fetched after this + instruction runs. Also, every instruction has an index, which is always + different. The index is incremented whenever a new instruction is created. + """ + + next_index = 0 + + def __init__(self): + self.nexts = [] + self.preds = [] + self.ID = Inst.next_index + Inst.next_index += 1 + + def add_next(self, next_inst): + self.nexts.append(next_inst) + next_inst.preds.append(self) + + @classmethod + @abstractmethod + def definition(self): + raise NotImplementedError + + @classmethod + @abstractmethod + def uses(self): + raise NotImplementedError + + def get_next(self): + if len(self.nexts) > 0: + return self.nexts[0] + else: + return None + + +class BinOp(Inst): + """ + The general class of binary instructions. These instructions define a + value, and use two values. As such, it contains a routine to extract the + defined value, and the list of used values. + """ + + def __init__(s, dst, src0, src1): + s.dst = dst + s.src0 = src0 + s.src1 = src1 + super().__init__() + + @classmethod + @abstractmethod + def get_opcode(self): + raise NotImplementedError + + def definition(s): + return set([s.dst]) + + def uses(s): + return set([s.src0, s.src1]) + + def __str__(self): + op = self.get_opcode() + inst_s = f"{self.ID}: {self.dst} = {self.src0}{op}{self.src1}" + pred_s = f"\n P: {', '.join([str(inst.ID) for inst in self.preds])}" + next_s = f"\n N: {self.nexts[0].ID if len(self.nexts) > 0 else ''}" + return inst_s + pred_s + next_s + + +class Add(BinOp): + """ + Example: + >>> a = Add("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + 5 + + >>> a = Add("a", "b0", "b1") + >>> a.get_next() == None + True + """ + + def eval(self, env): + env.set(self.dst, env.get(self.src0) + env.get(self.src1)) + + def get_opcode(self): + return "+" + + +class Mul(BinOp): + """ + Example: + >>> a = Mul("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + 6 + """ + + def eval(s, env): + env.set(s.dst, env.get(s.src0) * env.get(s.src1)) + + def get_opcode(self): + return "*" + + +class Lth(BinOp): + """ + Example: + >>> a = Lth("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + True + """ + + def eval(s, env): + env.set(s.dst, env.get(s.src0) < env.get(s.src1)) + + def get_opcode(self): + return "<" + + +class Geq(BinOp): + """ + Example: + >>> a = Geq("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + False + """ + + def eval(s, env): + env.set(s.dst, env.get(s.src0) >= env.get(s.src1)) + + def get_opcode(self): + return ">=" + + +class Bt(Inst): + """ + This is a Branch-If-True instruction, which diverts the control flow to the + 'true_dst' if the predicate 'pred' is true, and to the 'false_dst' + otherwise. + + Example: + >>> e = Env({"t": True, "x": 0}) + >>> a = Add("x", "x", "x") + >>> m = Mul("x", "x", "x") + >>> b = Bt("t", a, m) + >>> b.eval(e) + >>> b.get_next() == a + True + """ + + def __init__(s, cond, true_dst=None, false_dst=None): + super().__init__() + s.cond = cond + s.nexts = [true_dst, false_dst] + if true_dst != None: + true_dst.preds.append(s) + if false_dst != None: + false_dst.preds.append(s) + + def definition(s): + return set() + + def uses(s): + return set([s.cond]) + + def add_true_next(s, true_dst): + s.nexts[0] = true_dst + true_dst.preds.append(s) + + def add_next(s, false_dst): + s.nexts[1] = false_dst + false_dst.preds.append(s) + + def eval(s, env): + """ + The evaluation of the condition sets the next_iter to the instruction. + This value determines which successor instruction is to be evaluated. + Any values greater than 0 are evaluated as True, while 0 corresponds to + False. + """ + if env.get(s.cond): + s.next_iter = 0 + else: + s.next_iter = 1 + + def get_next(s): + return s.nexts[s.next_iter] + + def __str__(self): + inst_s = f"{self.ID}: bt {self.cond}" + pred_s = f"\n P: {', '.join([str(inst.ID) for inst in self.preds])}" + next_s = f"\n NT:{self.nexts[0].ID} NF:{self.nexts[1].ID}" + return inst_s + pred_s + next_s + + +def interp(instruction, environment): + """ + This function evaluates a program until there is no more instructions to + evaluate. + + Example: + >>> env = Env({"m": 3, "n": 2, "zero": 0}) + >>> m_min = Add("answer", "m", "zero") + >>> n_min = Add("answer", "n", "zero") + >>> p = Lth("p", "n", "m") + >>> b = Bt("p", n_min, m_min) + >>> p.add_next(b) + >>> interp(p, env).get("answer") + 2 + """ + if instruction: + instruction.eval(environment) + return interp(instruction.get_next(), environment) + else: + return environment \ No newline at end of file diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..31c33ed --- /dev/null +++ b/parser.py @@ -0,0 +1,62 @@ +""" +This file implements a parser: a function that reads a text file, and returns +a control-flow graph of instructions plus an environment mapping variables to +integer values. +""" + +from lang import * + + +def line2env(line): + """ + Maps a string (the line) to a dictionary in python. This function will be + useful to read the first line of the text file. This line contains the + initial environment of the program that will be created. If you don't like + the function, feel free to drop it off. + + Example + >>> line2env('{"zero": 0, "one": 1, "three": 3, "iter": 9}').get('one') + 1 + """ + import json + + env_dict = json.loads(line) + env_lang = Env() + for k, v in env_dict.items(): + env_lang.set(k, v) + return env_lang + + +def file2cfg_and_env(lines): + """ + Builds a control-flow graph representation for the strings stored in + `lines`. The first string represents the environment. The other strings + represent instructions. + + Example: + >>> l0 = '{"a": 0, "b": 3}' + >>> l1 = 'bt a 1' + >>> l2 = 'x = add a b' + >>> env, prog = file2cfg_and_env([l0, l1, l2]) + >>> interp(prog[0], env).get("x") + 3 + + >>> l0 = '{"a": 1, "b": 3, "x": 42, "z": 0}' + >>> l1 = 'bt a 2' + >>> l2 = 'x = add a b' + >>> l3 = 'x = add x z' + >>> env, prog = file2cfg_and_env([l0, l1, l2, l3]) + >>> interp(prog[0], env).get("x") + 42 + + >>> l0 = '{"a": 1, "b": 3, "c": 5}' + >>> l1 = 'x = add a b' + >>> l2 = 'x = add x c' + >>> env, prog = file2cfg_and_env([l0, l1, l2]) + >>> interp(prog[0], env).get("x") + 9 + """ + # TODO: Imlement this method. + env = line2env(lines[0]) + insts = [] + return (env, insts) \ No newline at end of file