374 lines
12 KiB
Python
374 lines
12 KiB
Python
from lang import *
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
class DataFlowEq(ABC):
|
|
"""
|
|
A class that implements a data-flow equation. The key trait of a data-flow
|
|
equation is an `eval` method, which evaluates that equation. The evaluation
|
|
of an equation might change the environment that associates data-flow facts
|
|
with identifiers.
|
|
|
|
Attributes:
|
|
num_evals the number of times that constraints have been evaluated.
|
|
Remember to zero this attribute once you start a new static
|
|
analysis, so that you can correctly count how many times each
|
|
equation had to be evaluated to solve the analysis.
|
|
"""
|
|
|
|
num_evals = 0
|
|
|
|
def __init__(self, instruction):
|
|
"""
|
|
Every data-flow equation is produced out of a program instruction. The
|
|
initialization of the data-flow equation verifies if, indeed, the input
|
|
object is an instruction.
|
|
"""
|
|
assert isinstance(instruction, Inst)
|
|
self.inst = instruction
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def name(self) -> str:
|
|
"""
|
|
The name of a data-flow equation is used to retrieve the data-flow
|
|
facts associated with that equation in the environment. For instance,
|
|
imagine that we have an equation like this one below:
|
|
|
|
"OUT[p] = (v, p) + (IN[p] - (v, _))"
|
|
|
|
This equation affects OUT[p]. We store OUT[p] in a dictionary. The name
|
|
of the equation is used as the key in this dictionary. For instance,
|
|
the name of the equation could be 'OUT_p'.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def deps(self) -> list:
|
|
"""
|
|
A list with the name of all the constraints that this equation depends
|
|
upon. For instance, if the equation is like:
|
|
|
|
"OUT[p] = (v, p) + (IN[p] - (v, _))"
|
|
|
|
Then, self.deps() == ['IN_p']
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def eval_aux(self, data_flow_env) -> set:
|
|
"""
|
|
This method determines how each concrete equation evaluates itself.
|
|
In a way, this design implements the 'template method' pattern. In other
|
|
words, the DataFlowEq class implements a concrete method eval, which
|
|
calls the abstract method eval_aux. It is the concrete implementation of
|
|
eval_aux that determines how the environment is affected by the
|
|
evaluation of a given equation.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def eval(self, data_flow_env) -> bool:
|
|
"""
|
|
This method implements the abstract evaluation of a data-flow equation.
|
|
Notice that the actual semantics of this evaluation will be implemented
|
|
by the `èval_aux` method, which is abstract.
|
|
"""
|
|
DataFlowEq.num_evals += 1
|
|
old_env = data_flow_env[self.name()]
|
|
data_flow_env[self.name()] = self.eval_aux(data_flow_env)
|
|
return True if data_flow_env[self.name()] != old_env else False
|
|
|
|
|
|
def name_in(ID):
|
|
"""
|
|
The name of an IN set is always ID + _IN. Eg.:
|
|
>>> Inst.next_index = 0
|
|
>>> add = Add('x', 'a', 'b')
|
|
>>> name_in(add.ID)
|
|
'IN_0'
|
|
"""
|
|
return f"IN_{ID}"
|
|
|
|
|
|
class IN_Eq(DataFlowEq):
|
|
"""
|
|
This abstract class represents all the equations that affect the IN set
|
|
related to some program point.
|
|
"""
|
|
|
|
def name(self):
|
|
return name_in(self.inst.ID)
|
|
|
|
|
|
def name_out(ID):
|
|
"""
|
|
The name of an OUT set is always ID + _OUT. Eg.:
|
|
>>> Inst.next_index = 0
|
|
>>> add = Add('x', 'a', 'b')
|
|
>>> name_out(add.ID)
|
|
'OUT_0'
|
|
"""
|
|
return f"OUT_{ID}"
|
|
|
|
|
|
class OUT_Eq(DataFlowEq):
|
|
"""
|
|
This abstract class represents all the equations that affect the OUT set
|
|
related to some program point.
|
|
"""
|
|
|
|
def name(self):
|
|
return name_out(self.inst.ID)
|
|
|
|
|
|
class ReachingDefs_Bin_OUT_Eq(OUT_Eq):
|
|
"""
|
|
This concrete class implements the equations that affect OUT facts of the
|
|
reaching-definitions analysis for binary instructions. These instructions
|
|
have three fields: dst, src0 and src1; however, only the former is of
|
|
interest for these equations.
|
|
"""
|
|
|
|
def eval_aux(self, data_flow_env):
|
|
"""
|
|
Evaluates this equation, where:
|
|
OUT[p] = (v, p) + (IN[p] - (v, _))
|
|
|
|
Example:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('x', 'a', 'b')
|
|
>>> df = ReachingDefs_Bin_OUT_Eq(i0)
|
|
>>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}}))
|
|
[('x', 0), ('y', 2)]
|
|
"""
|
|
in_set = data_flow_env[name_in(self.inst.ID)]
|
|
new_set = {(v, p) for (v, p) in in_set if v != self.inst.dst}
|
|
return new_set.union([(self.inst.dst, self.inst.ID)])
|
|
|
|
def deps(self):
|
|
"""
|
|
The list of dependencies of this equation. Ex.:
|
|
>>> Inst.next_index = 0
|
|
>>> add = Add('x', 'a', 'b')
|
|
>>> df = ReachingDefs_Bin_OUT_Eq(add)
|
|
>>> df.deps()
|
|
['IN_0']
|
|
"""
|
|
return [name_in(self.inst.ID)]
|
|
|
|
def __str__(self):
|
|
"""
|
|
A string representation of a reaching-defs equation representing
|
|
a binary instruction. Eg.:
|
|
>>> Inst.next_index = 0
|
|
>>> add = Add('x', 'a', 'b')
|
|
>>> df = ReachingDefs_Bin_OUT_Eq(add)
|
|
>>> str(df)
|
|
'OUT_0: (x, 0) + (IN_0 - (x, _))'
|
|
"""
|
|
kill_set = f" + ({name_in(self.inst.ID)} - ({self.inst.dst}, _))"
|
|
gen_set = f"({self.inst.dst}, {self.inst.ID})"
|
|
return f"{self.name()}: {gen_set}{kill_set}"
|
|
|
|
|
|
class ReachingDefs_Bt_OUT_Eq(OUT_Eq):
|
|
"""
|
|
This concrete class implements the equations that affect OUT facts of the
|
|
reaching-definitions analysis for branch instructions. These instructions
|
|
do not affect reaching definitions at all. Therefore, their equations are
|
|
mostly treated as identity functions.
|
|
"""
|
|
|
|
def eval_aux(self, data_flow_env):
|
|
"""
|
|
Evaluates this equation. Notice that the reaching definition equation
|
|
for a branch instruction is simply the identity function.
|
|
OUT[p] = IN[p]
|
|
|
|
Example:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Bt('x')
|
|
>>> df = ReachingDefs_Bt_OUT_Eq(i0)
|
|
>>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}}))
|
|
[('x', 1), ('y', 2)]
|
|
"""
|
|
return data_flow_env[name_in(self.inst.ID)]
|
|
|
|
def deps(self):
|
|
"""
|
|
The list of dependencies of this equation. Ex.:
|
|
>>> Inst.next_index = 0
|
|
>>> i = Bt('x')
|
|
>>> df = ReachingDefs_Bt_OUT_Eq(i)
|
|
>>> df.deps()
|
|
['IN_0']
|
|
"""
|
|
return [name_in(self.inst.ID)]
|
|
|
|
def __str__(self):
|
|
"""
|
|
A string representation of a reaching-defs equation representing a
|
|
branch. Eg.:
|
|
>>> Inst.next_index = 0
|
|
>>> i = Bt('x')
|
|
>>> df = ReachingDefs_Bt_OUT_Eq(i)
|
|
>>> str(df)
|
|
'OUT_0: IN_0'
|
|
"""
|
|
kill_set = f"{name_in(self.inst.ID)}"
|
|
gen_set = f""
|
|
return f"{self.name()}: {gen_set}{kill_set}"
|
|
|
|
|
|
class ReachingDefs_IN_Eq(IN_Eq):
|
|
"""
|
|
This concrete class implements the meet operation for reaching-definition
|
|
analysis. The meet operation produces the IN set of a program point. This
|
|
IN set is the union of the OUT set of the predecessors of this point.
|
|
"""
|
|
|
|
def eval_aux(self, data_flow_env):
|
|
"""
|
|
The evaluation of the meet operation over reaching definitions is the
|
|
union of the OUT sets of the predecessors of the instruction.
|
|
|
|
Example:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('x', 'a', 'b')
|
|
>>> i1 = Add('x', 'c', 'd')
|
|
>>> i2 = Add('y', 'x', 'x')
|
|
>>> i0.add_next(i2)
|
|
>>> i1.add_next(i2)
|
|
>>> df = ReachingDefs_IN_Eq(i2)
|
|
>>> sorted(df.eval_aux({'OUT_0': {('x', 0)}, 'OUT_1': {('x', 1)}}))
|
|
[('x', 0), ('x', 1)]
|
|
"""
|
|
solution = set()
|
|
for inst in self.inst.preds:
|
|
solution = solution.union(data_flow_env[name_out(inst.ID)])
|
|
return solution
|
|
|
|
def deps(self):
|
|
"""
|
|
The list of dependencies of this equation. Ex.:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('x', 'a', 'b')
|
|
>>> i1 = Add('x', 'c', 'd')
|
|
>>> i2 = Add('y', 'x', 'x')
|
|
>>> i0.add_next(i2)
|
|
>>> i1.add_next(i2)
|
|
>>> df = ReachingDefs_IN_Eq(i2)
|
|
>>> sorted(df.deps())
|
|
['OUT_0', 'OUT_1']
|
|
"""
|
|
# TODO: Implement this method
|
|
return []
|
|
|
|
def __str__(self):
|
|
"""
|
|
The name of an IN set is always ID + _IN.
|
|
|
|
Example:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('x', 'a', 'b')
|
|
>>> i1 = Add('x', 'c', 'd')
|
|
>>> i2 = Add('y', 'x', 'x')
|
|
>>> i0.add_next(i2)
|
|
>>> i1.add_next(i2)
|
|
>>> df = ReachingDefs_IN_Eq(i2)
|
|
>>> str(df)
|
|
'IN_2: Union( OUT_0, OUT_1 )'
|
|
"""
|
|
succs = ", ".join([name_out(pred.ID) for pred in self.inst.preds])
|
|
return f"{self.name()}: Union( {succs} )"
|
|
|
|
|
|
def reaching_defs_constraint_gen(insts):
|
|
"""
|
|
Builds a list of equations to solve Reaching-Definition Analysis for the
|
|
given set of instructions.
|
|
|
|
Example:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('c', 'a', 'b')
|
|
>>> i1 = Mul('d', 'c', 'a')
|
|
>>> i2 = Lth('e', 'c', 'd')
|
|
>>> i0.add_next(i2)
|
|
>>> i1.add_next(i2)
|
|
>>> insts = [i0, i1, i2]
|
|
>>> sol = [str(eq) for eq in reaching_defs_constraint_gen(insts)]
|
|
>>> sol[0] + " " + sol[-1]
|
|
'OUT_0: (c, 0) + (IN_0 - (c, _)) IN_2: Union( OUT_0, OUT_1 )'
|
|
"""
|
|
in0 = [ReachingDefs_Bin_OUT_Eq(i) for i in insts if isinstance(i, BinOp)]
|
|
in1 = [ReachingDefs_Bt_OUT_Eq(i) for i in insts if isinstance(i, Bt)]
|
|
out = [ReachingDefs_IN_Eq(i) for i in insts]
|
|
return in0 + in1 + out
|
|
|
|
|
|
def abstract_interp(equations):
|
|
"""
|
|
This function iterates on the equations, solving them in the order in which
|
|
they appear. It returns an environment with the solution to the data-flow
|
|
analysis.
|
|
|
|
Example for reaching-definition analysis:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('c', 'a', 'b')
|
|
>>> i1 = Mul('d', 'c', 'a')
|
|
>>> i0.add_next(i1)
|
|
>>> eqs = reaching_defs_constraint_gen([i0, i1])
|
|
>>> (sol, num_evals) = abstract_interp(eqs)
|
|
>>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}"
|
|
"OUT_0: [('c', 0)], Num Evals: 12"
|
|
"""
|
|
from functools import reduce
|
|
|
|
DataFlowEq.num_evals = 0
|
|
env = {eq.name(): set() for eq in equations}
|
|
changed = True
|
|
while changed:
|
|
changed = reduce(lambda acc, eq: eq.eval(env) or acc, equations, False)
|
|
return (env, DataFlowEq.num_evals)
|
|
|
|
def build_dependence_graph(equations):
|
|
"""
|
|
This function builds the dependence graph of equations.
|
|
|
|
Example:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('c', 'a', 'b')
|
|
>>> i1 = Mul('d', 'c', 'a')
|
|
>>> i0.add_next(i1)
|
|
>>> eqs = reaching_defs_constraint_gen([i0, i1])
|
|
>>> deps = build_dependence_graph(eqs)
|
|
>>> [eq.name() for eq in deps['IN_0']]
|
|
['OUT_0']
|
|
"""
|
|
# TODO: implement this method
|
|
dep_graph = {eq.name(): [] for eq in equations}
|
|
return dep_graph
|
|
|
|
def abstract_interp_worklist(equations):
|
|
"""
|
|
This function solves the system of equations using a worklist. Once an
|
|
equation E is evaluated, and the evaluation changes the environment, only
|
|
the dependencies of E are pushed onto the worklist.
|
|
|
|
Example for reaching-definition analysis:
|
|
>>> Inst.next_index = 0
|
|
>>> i0 = Add('c', 'a', 'b')
|
|
>>> i1 = Mul('d', 'c', 'a')
|
|
>>> i0.add_next(i1)
|
|
>>> eqs = reaching_defs_constraint_gen([i0, i1])
|
|
>>> (sol, num_evals) = abstract_interp_worklist(eqs)
|
|
>>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}"
|
|
"OUT_0: [('c', 0)], Num Evals: 6"
|
|
"""
|
|
# TODO: implement this method
|
|
from collections import defaultdict
|
|
DataFlowEq.num_evals = 0
|
|
env = defaultdict(list)
|
|
return (env, DataFlowEq.num_evals) |