Initial commit

This commit is contained in:
Vinicius Silva 2024-04-20 14:06:48 -03:00
commit ee6afb763b
5 changed files with 758 additions and 0 deletions

BIN
Worklist Algorithms.zip Normal file

Binary file not shown.

374
dataflow.py Normal file
View File

@ -0,0 +1,374 @@
from lang import *
from abc import ABC, abstractmethod
class DataFlowEq(ABC):
"""
A class that implements a data-flow equation. The key trait of a data-flow
equation is an `eval` method, which evaluates that equation. The evaluation
of an equation might change the environment that associates data-flow facts
with identifiers.
Attributes:
num_evals the number of times that constraints have been evaluated.
Remember to zero this attribute once you start a new static
analysis, so that you can correctly count how many times each
equation had to be evaluated to solve the analysis.
"""
num_evals = 0
def __init__(self, instruction):
"""
Every data-flow equation is produced out of a program instruction. The
initialization of the data-flow equation verifies if, indeed, the input
object is an instruction.
"""
assert isinstance(instruction, Inst)
self.inst = instruction
@classmethod
@abstractmethod
def name(self) -> str:
"""
The name of a data-flow equation is used to retrieve the data-flow
facts associated with that equation in the environment. For instance,
imagine that we have an equation like this one below:
"OUT[p] = (v, p) + (IN[p] - (v, _))"
This equation affects OUT[p]. We store OUT[p] in a dictionary. The name
of the equation is used as the key in this dictionary. For instance,
the name of the equation could be 'OUT_p'.
"""
raise NotImplementedError
@classmethod
@abstractmethod
def deps(self) -> list:
"""
A list with the name of all the constraints that this equation depends
upon. For instance, if the equation is like:
"OUT[p] = (v, p) + (IN[p] - (v, _))"
Then, self.deps() == ['IN_p']
"""
raise NotImplementedError
@classmethod
@abstractmethod
def eval_aux(self, data_flow_env) -> set:
"""
This method determines how each concrete equation evaluates itself.
In a way, this design implements the 'template method' pattern. In other
words, the DataFlowEq class implements a concrete method eval, which
calls the abstract method eval_aux. It is the concrete implementation of
eval_aux that determines how the environment is affected by the
evaluation of a given equation.
"""
raise NotImplementedError
def eval(self, data_flow_env) -> bool:
"""
This method implements the abstract evaluation of a data-flow equation.
Notice that the actual semantics of this evaluation will be implemented
by the `èval_aux` method, which is abstract.
"""
DataFlowEq.num_evals += 1
old_env = data_flow_env[self.name()]
data_flow_env[self.name()] = self.eval_aux(data_flow_env)
return True if data_flow_env[self.name()] != old_env else False
def name_in(ID):
"""
The name of an IN set is always ID + _IN. Eg.:
>>> Inst.next_index = 0
>>> add = Add('x', 'a', 'b')
>>> name_in(add.ID)
'IN_0'
"""
return f"IN_{ID}"
class IN_Eq(DataFlowEq):
"""
This abstract class represents all the equations that affect the IN set
related to some program point.
"""
def name(self):
return name_in(self.inst.ID)
def name_out(ID):
"""
The name of an OUT set is always ID + _OUT. Eg.:
>>> Inst.next_index = 0
>>> add = Add('x', 'a', 'b')
>>> name_out(add.ID)
'OUT_0'
"""
return f"OUT_{ID}"
class OUT_Eq(DataFlowEq):
"""
This abstract class represents all the equations that affect the OUT set
related to some program point.
"""
def name(self):
return name_out(self.inst.ID)
class ReachingDefs_Bin_OUT_Eq(OUT_Eq):
"""
This concrete class implements the equations that affect OUT facts of the
reaching-definitions analysis for binary instructions. These instructions
have three fields: dst, src0 and src1; however, only the former is of
interest for these equations.
"""
def eval_aux(self, data_flow_env):
"""
Evaluates this equation, where:
OUT[p] = (v, p) + (IN[p] - (v, _))
Example:
>>> Inst.next_index = 0
>>> i0 = Add('x', 'a', 'b')
>>> df = ReachingDefs_Bin_OUT_Eq(i0)
>>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}}))
[('x', 0), ('y', 2)]
"""
in_set = data_flow_env[name_in(self.inst.ID)]
new_set = {(v, p) for (v, p) in in_set if v != self.inst.dst}
return new_set.union([(self.inst.dst, self.inst.ID)])
def deps(self):
"""
The list of dependencies of this equation. Ex.:
>>> Inst.next_index = 0
>>> add = Add('x', 'a', 'b')
>>> df = ReachingDefs_Bin_OUT_Eq(add)
>>> df.deps()
['IN_0']
"""
return [name_in(self.inst.ID)]
def __str__(self):
"""
A string representation of a reaching-defs equation representing
a binary instruction. Eg.:
>>> Inst.next_index = 0
>>> add = Add('x', 'a', 'b')
>>> df = ReachingDefs_Bin_OUT_Eq(add)
>>> str(df)
'OUT_0: (x, 0) + (IN_0 - (x, _))'
"""
kill_set = f" + ({name_in(self.inst.ID)} - ({self.inst.dst}, _))"
gen_set = f"({self.inst.dst}, {self.inst.ID})"
return f"{self.name()}: {gen_set}{kill_set}"
class ReachingDefs_Bt_OUT_Eq(OUT_Eq):
"""
This concrete class implements the equations that affect OUT facts of the
reaching-definitions analysis for branch instructions. These instructions
do not affect reaching definitions at all. Therefore, their equations are
mostly treated as identity functions.
"""
def eval_aux(self, data_flow_env):
"""
Evaluates this equation. Notice that the reaching definition equation
for a branch instruction is simply the identity function.
OUT[p] = IN[p]
Example:
>>> Inst.next_index = 0
>>> i0 = Bt('x')
>>> df = ReachingDefs_Bt_OUT_Eq(i0)
>>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}}))
[('x', 1), ('y', 2)]
"""
return data_flow_env[name_in(self.inst.ID)]
def deps(self):
"""
The list of dependencies of this equation. Ex.:
>>> Inst.next_index = 0
>>> i = Bt('x')
>>> df = ReachingDefs_Bt_OUT_Eq(i)
>>> df.deps()
['IN_0']
"""
return [name_in(self.inst.ID)]
def __str__(self):
"""
A string representation of a reaching-defs equation representing a
branch. Eg.:
>>> Inst.next_index = 0
>>> i = Bt('x')
>>> df = ReachingDefs_Bt_OUT_Eq(i)
>>> str(df)
'OUT_0: IN_0'
"""
kill_set = f"{name_in(self.inst.ID)}"
gen_set = f""
return f"{self.name()}: {gen_set}{kill_set}"
class ReachingDefs_IN_Eq(IN_Eq):
"""
This concrete class implements the meet operation for reaching-definition
analysis. The meet operation produces the IN set of a program point. This
IN set is the union of the OUT set of the predecessors of this point.
"""
def eval_aux(self, data_flow_env):
"""
The evaluation of the meet operation over reaching definitions is the
union of the OUT sets of the predecessors of the instruction.
Example:
>>> Inst.next_index = 0
>>> i0 = Add('x', 'a', 'b')
>>> i1 = Add('x', 'c', 'd')
>>> i2 = Add('y', 'x', 'x')
>>> i0.add_next(i2)
>>> i1.add_next(i2)
>>> df = ReachingDefs_IN_Eq(i2)
>>> sorted(df.eval_aux({'OUT_0': {('x', 0)}, 'OUT_1': {('x', 1)}}))
[('x', 0), ('x', 1)]
"""
solution = set()
for inst in self.inst.preds:
solution = solution.union(data_flow_env[name_out(inst.ID)])
return solution
def deps(self):
"""
The list of dependencies of this equation. Ex.:
>>> Inst.next_index = 0
>>> i0 = Add('x', 'a', 'b')
>>> i1 = Add('x', 'c', 'd')
>>> i2 = Add('y', 'x', 'x')
>>> i0.add_next(i2)
>>> i1.add_next(i2)
>>> df = ReachingDefs_IN_Eq(i2)
>>> sorted(df.deps())
['OUT_0', 'OUT_1']
"""
# TODO: Implement this method
return []
def __str__(self):
"""
The name of an IN set is always ID + _IN.
Example:
>>> Inst.next_index = 0
>>> i0 = Add('x', 'a', 'b')
>>> i1 = Add('x', 'c', 'd')
>>> i2 = Add('y', 'x', 'x')
>>> i0.add_next(i2)
>>> i1.add_next(i2)
>>> df = ReachingDefs_IN_Eq(i2)
>>> str(df)
'IN_2: Union( OUT_0, OUT_1 )'
"""
succs = ", ".join([name_out(pred.ID) for pred in self.inst.preds])
return f"{self.name()}: Union( {succs} )"
def reaching_defs_constraint_gen(insts):
"""
Builds a list of equations to solve Reaching-Definition Analysis for the
given set of instructions.
Example:
>>> Inst.next_index = 0
>>> i0 = Add('c', 'a', 'b')
>>> i1 = Mul('d', 'c', 'a')
>>> i2 = Lth('e', 'c', 'd')
>>> i0.add_next(i2)
>>> i1.add_next(i2)
>>> insts = [i0, i1, i2]
>>> sol = [str(eq) for eq in reaching_defs_constraint_gen(insts)]
>>> sol[0] + " " + sol[-1]
'OUT_0: (c, 0) + (IN_0 - (c, _)) IN_2: Union( OUT_0, OUT_1 )'
"""
in0 = [ReachingDefs_Bin_OUT_Eq(i) for i in insts if isinstance(i, BinOp)]
in1 = [ReachingDefs_Bt_OUT_Eq(i) for i in insts if isinstance(i, Bt)]
out = [ReachingDefs_IN_Eq(i) for i in insts]
return in0 + in1 + out
def abstract_interp(equations):
"""
This function iterates on the equations, solving them in the order in which
they appear. It returns an environment with the solution to the data-flow
analysis.
Example for reaching-definition analysis:
>>> Inst.next_index = 0
>>> i0 = Add('c', 'a', 'b')
>>> i1 = Mul('d', 'c', 'a')
>>> i0.add_next(i1)
>>> eqs = reaching_defs_constraint_gen([i0, i1])
>>> (sol, num_evals) = abstract_interp(eqs)
>>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}"
"OUT_0: [('c', 0)], Num Evals: 12"
"""
from functools import reduce
DataFlowEq.num_evals = 0
env = {eq.name(): set() for eq in equations}
changed = True
while changed:
changed = reduce(lambda acc, eq: eq.eval(env) or acc, equations, False)
return (env, DataFlowEq.num_evals)
def build_dependence_graph(equations):
"""
This function builds the dependence graph of equations.
Example:
>>> Inst.next_index = 0
>>> i0 = Add('c', 'a', 'b')
>>> i1 = Mul('d', 'c', 'a')
>>> i0.add_next(i1)
>>> eqs = reaching_defs_constraint_gen([i0, i1])
>>> deps = build_dependence_graph(eqs)
>>> [eq.name() for eq in deps['IN_0']]
['OUT_0']
"""
# TODO: implement this method
dep_graph = {eq.name(): [] for eq in equations}
return dep_graph
def abstract_interp_worklist(equations):
"""
This function solves the system of equations using a worklist. Once an
equation E is evaluated, and the evaluation changes the environment, only
the dependencies of E are pushed onto the worklist.
Example for reaching-definition analysis:
>>> Inst.next_index = 0
>>> i0 = Add('c', 'a', 'b')
>>> i1 = Mul('d', 'c', 'a')
>>> i0.add_next(i1)
>>> eqs = reaching_defs_constraint_gen([i0, i1])
>>> (sol, num_evals) = abstract_interp_worklist(eqs)
>>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}"
"OUT_0: [('c', 0)], Num Evals: 6"
"""
# TODO: implement this method
from collections import defaultdict
DataFlowEq.num_evals = 0
env = defaultdict(list)
return (env, DataFlowEq.num_evals)

30
driver.py Normal file
View File

@ -0,0 +1,30 @@
import sys
import lang
import parser
import dataflow
from lang import interp
def chaotic_solver(program):
equations = dataflow.reaching_defs_constraint_gen(program)
return dataflow.abstract_interp(equations)
def worklist_solver(program):
equations = dataflow.reaching_defs_constraint_gen(program)
return dataflow.abstract_interp_worklist(equations)
if __name__ == "__main__":
"""
This function reads a program, and solves reaching definition analysis
for it, using either chaotic iterations or the worklist-based algorithm.
"""
lang.Inst.next_index = 0
lines = sys.stdin.readlines()
env, program = parser.file2cfg_and_env(lines)
(env_chaotic, n_chaotic) = chaotic_solver(program)
(env_worklist, n_worklist) = worklist_solver(program)
print(f"Are the environments the same? {env_chaotic == env_worklist}")
print(f"Does it iterate less than chaotic-sol? {n_worklist <= n_chaotic}")

292
lang.py Normal file
View File

@ -0,0 +1,292 @@
"""
This file contains the implementation of a simple interpreter of low-level
instructions. The interpreter takes a program, represented as its first
instruction, plus an environment, which is a stack of bindings. Bindings are
pairs of variable names and values. New bindings are added to the stack
whenever new variables are defined. Bindings are never removed from the stack.
In this way, we can inspect the history of state transformations caused by the
interpretation of a program.
This file uses doctests all over. To test it, just run python 3 as follows:
"python3 -m doctest main.py". The program uses syntax that is excluive of
Python 3. It will not work with standard Python 2.
"""
from collections import deque
from abc import ABC, abstractmethod
class Env:
"""
A table that associates variables with values. The environment is
implemented as a stack, so that previous bindings of a variable V remain
available in the environment if V is overassigned.
Example:
>>> e = Env()
>>> e.set("a", 2)
>>> e.set("a", 3)
>>> e.get("a")
3
>>> e = Env({"b": 5})
>>> e.set("a", 2)
>>> e.get("a") + e.get("b")
7
"""
def __init__(s, initial_args={}):
s.env = deque()
for var, value in initial_args.items():
s.env.appendleft((var, value))
def get(self, var):
"""
Finds the first occurrence of variable 'var' in the environment stack,
and returns the value associated with it.
"""
val = next((value for (e_var, value) in self.env if e_var == var), None)
if val is not None:
return val
else:
raise LookupError(f"Absent key {val}")
def set(s, var, value):
"""
This method adds 'var' to the environment, by placing the binding
'(var, value)' onto the top of the environment stack.
"""
s.env.appendleft((var, value))
def dump(s):
"""
Prints the contents of the environment. This method is mostly used for
debugging purposes.
"""
for var, value in s.env:
print(f"{var}: {value}")
class Inst(ABC):
"""
The representation of instructions. All that an instruction has, that is
common among all the instructions, is the next_inst attribute. This
attribute determines the next instruction that will be fetched after this
instruction runs. Also, every instruction has an index, which is always
different. The index is incremented whenever a new instruction is created.
"""
next_index = 0
def __init__(self):
self.nexts = []
self.preds = []
self.ID = Inst.next_index
Inst.next_index += 1
def add_next(self, next_inst):
self.nexts.append(next_inst)
next_inst.preds.append(self)
@classmethod
@abstractmethod
def definition(self):
raise NotImplementedError
@classmethod
@abstractmethod
def uses(self):
raise NotImplementedError
def get_next(self):
if len(self.nexts) > 0:
return self.nexts[0]
else:
return None
class BinOp(Inst):
"""
The general class of binary instructions. These instructions define a
value, and use two values. As such, it contains a routine to extract the
defined value, and the list of used values.
"""
def __init__(s, dst, src0, src1):
s.dst = dst
s.src0 = src0
s.src1 = src1
super().__init__()
@classmethod
@abstractmethod
def get_opcode(self):
raise NotImplementedError
def definition(s):
return set([s.dst])
def uses(s):
return set([s.src0, s.src1])
def __str__(self):
op = self.get_opcode()
inst_s = f"{self.ID}: {self.dst} = {self.src0}{op}{self.src1}"
pred_s = f"\n P: {', '.join([str(inst.ID) for inst in self.preds])}"
next_s = f"\n N: {self.nexts[0].ID if len(self.nexts) > 0 else ''}"
return inst_s + pred_s + next_s
class Add(BinOp):
"""
Example:
>>> a = Add("a", "b0", "b1")
>>> e = Env({"b0":2, "b1":3})
>>> a.eval(e)
>>> e.get("a")
5
>>> a = Add("a", "b0", "b1")
>>> a.get_next() == None
True
"""
def eval(self, env):
env.set(self.dst, env.get(self.src0) + env.get(self.src1))
def get_opcode(self):
return "+"
class Mul(BinOp):
"""
Example:
>>> a = Mul("a", "b0", "b1")
>>> e = Env({"b0":2, "b1":3})
>>> a.eval(e)
>>> e.get("a")
6
"""
def eval(s, env):
env.set(s.dst, env.get(s.src0) * env.get(s.src1))
def get_opcode(self):
return "*"
class Lth(BinOp):
"""
Example:
>>> a = Lth("a", "b0", "b1")
>>> e = Env({"b0":2, "b1":3})
>>> a.eval(e)
>>> e.get("a")
True
"""
def eval(s, env):
env.set(s.dst, env.get(s.src0) < env.get(s.src1))
def get_opcode(self):
return "<"
class Geq(BinOp):
"""
Example:
>>> a = Geq("a", "b0", "b1")
>>> e = Env({"b0":2, "b1":3})
>>> a.eval(e)
>>> e.get("a")
False
"""
def eval(s, env):
env.set(s.dst, env.get(s.src0) >= env.get(s.src1))
def get_opcode(self):
return ">="
class Bt(Inst):
"""
This is a Branch-If-True instruction, which diverts the control flow to the
'true_dst' if the predicate 'pred' is true, and to the 'false_dst'
otherwise.
Example:
>>> e = Env({"t": True, "x": 0})
>>> a = Add("x", "x", "x")
>>> m = Mul("x", "x", "x")
>>> b = Bt("t", a, m)
>>> b.eval(e)
>>> b.get_next() == a
True
"""
def __init__(s, cond, true_dst=None, false_dst=None):
super().__init__()
s.cond = cond
s.nexts = [true_dst, false_dst]
if true_dst != None:
true_dst.preds.append(s)
if false_dst != None:
false_dst.preds.append(s)
def definition(s):
return set()
def uses(s):
return set([s.cond])
def add_true_next(s, true_dst):
s.nexts[0] = true_dst
true_dst.preds.append(s)
def add_next(s, false_dst):
s.nexts[1] = false_dst
false_dst.preds.append(s)
def eval(s, env):
"""
The evaluation of the condition sets the next_iter to the instruction.
This value determines which successor instruction is to be evaluated.
Any values greater than 0 are evaluated as True, while 0 corresponds to
False.
"""
if env.get(s.cond):
s.next_iter = 0
else:
s.next_iter = 1
def get_next(s):
return s.nexts[s.next_iter]
def __str__(self):
inst_s = f"{self.ID}: bt {self.cond}"
pred_s = f"\n P: {', '.join([str(inst.ID) for inst in self.preds])}"
next_s = f"\n NT:{self.nexts[0].ID} NF:{self.nexts[1].ID}"
return inst_s + pred_s + next_s
def interp(instruction, environment):
"""
This function evaluates a program until there is no more instructions to
evaluate.
Example:
>>> env = Env({"m": 3, "n": 2, "zero": 0})
>>> m_min = Add("answer", "m", "zero")
>>> n_min = Add("answer", "n", "zero")
>>> p = Lth("p", "n", "m")
>>> b = Bt("p", n_min, m_min)
>>> p.add_next(b)
>>> interp(p, env).get("answer")
2
"""
if instruction:
instruction.eval(environment)
return interp(instruction.get_next(), environment)
else:
return environment

62
parser.py Normal file
View File

@ -0,0 +1,62 @@
"""
This file implements a parser: a function that reads a text file, and returns
a control-flow graph of instructions plus an environment mapping variables to
integer values.
"""
from lang import *
def line2env(line):
"""
Maps a string (the line) to a dictionary in python. This function will be
useful to read the first line of the text file. This line contains the
initial environment of the program that will be created. If you don't like
the function, feel free to drop it off.
Example
>>> line2env('{"zero": 0, "one": 1, "three": 3, "iter": 9}').get('one')
1
"""
import json
env_dict = json.loads(line)
env_lang = Env()
for k, v in env_dict.items():
env_lang.set(k, v)
return env_lang
def file2cfg_and_env(lines):
"""
Builds a control-flow graph representation for the strings stored in
`lines`. The first string represents the environment. The other strings
represent instructions.
Example:
>>> l0 = '{"a": 0, "b": 3}'
>>> l1 = 'bt a 1'
>>> l2 = 'x = add a b'
>>> env, prog = file2cfg_and_env([l0, l1, l2])
>>> interp(prog[0], env).get("x")
3
>>> l0 = '{"a": 1, "b": 3, "x": 42, "z": 0}'
>>> l1 = 'bt a 2'
>>> l2 = 'x = add a b'
>>> l3 = 'x = add x z'
>>> env, prog = file2cfg_and_env([l0, l1, l2, l3])
>>> interp(prog[0], env).get("x")
42
>>> l0 = '{"a": 1, "b": 3, "c": 5}'
>>> l1 = 'x = add a b'
>>> l2 = 'x = add x c'
>>> env, prog = file2cfg_and_env([l0, l1, l2])
>>> interp(prog[0], env).get("x")
9
"""
# TODO: Imlement this method.
env = line2env(lines[0])
insts = []
return (env, insts)