From ee6afb763b151f0feb62d7b9f851bf81840c0cda Mon Sep 17 00:00:00 2001 From: Vinicius Silva Date: Sat, 20 Apr 2024 14:06:48 -0300 Subject: [PATCH] Initial commit --- Worklist Algorithms.zip | Bin 0 -> 6671 bytes dataflow.py | 374 ++++++++++++++++++++++++++++++++++++++++ driver.py | 30 ++++ lang.py | 292 +++++++++++++++++++++++++++++++ parser.py | 62 +++++++ 5 files changed, 758 insertions(+) create mode 100644 Worklist Algorithms.zip create mode 100644 dataflow.py create mode 100644 driver.py create mode 100644 lang.py create mode 100644 parser.py diff --git a/Worklist Algorithms.zip b/Worklist Algorithms.zip new file mode 100644 index 0000000000000000000000000000000000000000..3e8739acc39619d31ac056a13a2cc582e74c676b GIT binary patch literal 6671 zcmZ`;WlS90wjC($(xJG!mw_V1-J!tX?$$wzyUP@JD=vlNR;*Cm-QBIg;0~|%zU02# zFL~$u*f}Rzzt+yq+H0vQyg_&ifCr!e!X1;e2}^;4NpJu_?Hd396#xL3JKMNfII}x? z_NocRE^}h6KC_^xV8wx=Vx)reJK0uS(FHS|BbJgBm~I2}U)z)H=~6fMylQAKgoK3? zQhBU{?g&WMVn9P0<1DtcKbmmyANbq2zs^@DNiImRM!J5^u%yNACX(3d>g5zkbSn>& zbK>{0=@fqO?dlPu&Z-P)$HTHp86*;H>B^Uz66>cbB9dzViWbHTk7F z+7tD`ZZGX6oLDP*eb^Lx43&L{G)s9xpoy^E<)pam^YX>3=N`%nwFHtb6DbYlpX=BF zF7ZPqaUceLTBw3B+L%MUS>A!`TaIC&1|6bhe;7-rp- z`@_Z4BZZ|h$5nA{M$ibJ0r3P=&xj8_ z4zT^~O2LWS>zBq`jo2>x&H7AbPW2HD#)eR2eA6V}WZ1Rq@$A71oa%p3$27j*{}Ubn zI79kh>Ksj+UH(m7JGcw-m%8>x7L>C%j{#`kAr@vFY1s8d_k@Cj)Fku!Wb0%)iG<~P ziC5o3`x2*lfF3q_Tm(*;jPQk=+%)&kC1z|j8wBtyF$;HBaDXmfEJQiQenf!Lj?!KV z5T0-D-)RuhPfw{Rx18tH#{fS!hhTO6uIz-yKC86Jn2vEf*p50TT$dNY1TY%xJm)B1 zDrvw{7l^!>q-}GBW1p|si7Bu&gZUsRSGFc8pchE+Rsm7(-G)m7EuH+tkg-^r*@s$B z_^VC)wig^8EhQ<(1g8a4jB!Tv&#BCs^egy4v}^SR-9(Ay^ql8{DPm2+N#fYrNuQKu zI4FJcC6y_c!P=sGsYI0UX-uxAqhyGhG$P$q?mIHk6V~f0+sCz{a2^W`?L9}T&A|_< zx}dGLTQ>c_w0XkgeF}Ds(6>Fb$Bde^$m!X*%-)EX$rKshy=aImFggSYWR7i`!otsCv##G)x+TAmGUsD9)*pM# z#HhHNI{pfq&c^Qd7GEu#jDl3MxeZTh$cOu(!}404&UcW6R!R5~-6O#N&;f%o}XllluSupjec z^&_Z|aVW)ozTNcMSYgMJ?=tgO;WW3BFhrqy&Vo+6vT(PEdjx{Wa$Jnp5P+-4JUEl# zWN96jJcYJu4Y$r`ShkueFBr)j*r3_O&(2Bo*i4q7`=Ud*F=gE_*_H!hGw$mo$Twc8 zp2{a(T1Xqu;sK+V)8b#0&B*7=x->4Q+@~ZDjA`fiOY>_MvC97p%wE4|=xdNA#u9XT zi9AlI67?Ijdn-x6snGyWAQ)kw9TQE+frc~092Ddx>!wE~rZ7s2(yJPs9gt2Ff|sl z3plfl-H`(baHS#(C{UAh6L+H}>zM!`hG2}sX=7GZU|1)L%Z(m7c1v=_;xZd&tMqXN z7%}VvuiTMz)naCgS&yw_X2oL3*nyu>Q|e+cx==85CBMDL#js&F5rCR#B=iVf)oWxC zBPiPtI~<2uHkCH9HnXQO_+-*C@!5;3?oq}%R)%$auSWzut)cM_YkBf|-JUwQcp%4a z{rrLG$echs$O6Ie=J*D|L!#J;{^h<=bU;nZu3FlL@JGqqA)(@uWah(#f@)lI$bwVM ziLdJz)EU~PtWpHuge5FNK!$Bmx)7VA~1>Wt2` zx3y7?_X^d7LZYym-P)ug1NIWjF<9BoY{KzZowl|yFMuM_M4;*p11 z)4T}SkS^^5Uj^m;8Ye|J=O%E%KVsQ#&e)DSOI+gGQ{ck)F%6cSu>Cfet=W~1zW=*uR5}#SHb6GYN)1(uT2Q?PdL==n%Jdsw zKaPIUR7_xOF*qS#rb!=P#Ze6k$s(aXl$6TgWMy`d?NOjv;E?x)WZvqs2pN>Pvs94% zsavRfr~RBEU0vXi?rA=YKcQ~wDhKD%aZdTzBf`iZXPp_fk5kFLFTv}W(SyZlqAE{@ zT~1uTRXW91vn?P^2q0RUkpvvQ$=A$T|JKWH_!yd*R2 zHeU)TYhs7}NaYJhC(@nOxe z_Vu0lhlyPUSFE$El0xgE5nGRT1NaQpUYtF7(;PHtDXgY*m~ld>7|dnlmVU0j$e$j0 zqY?8RfW%8pgSDwE!Z(_GZgT(lu7gyAKDr?PX&Bo%8CH)4*S}40qsywsDj{mMNG*tL zExC8SzbkrsI#2@$5e@gd|2BHn!K+MqMB=D~ncN-Ug7gi$ptHf)lCZgh%u4Y5x&r9J z92J0D_Yv6A^zlG>*#|uk@>qv$rQ1L-F9DDI1IO;DTF89ipkML*OgUcp%~Fxl`17!m zyh(08VYboK?QE`GAiurZ2I)*J6%)qRxZ~B(SHtUq&oo!ADWR)l*I-xF*`4n=ufZa{ zG2~T$^m~;>;)7y8k-1cQCoC064)qPEDB!&EEsOp%;QI!B8`*;Em_$cU$|xc{X^fgL z6L$=>TtSCCKICW2!@RDS=J`iZaz=mAY3LchPp^}^$z-NO`Fx#(*Z({YVvf`hYAKGa z9o%N!l1?w)znMUtkpfzFpGOq(nI+2gj1=qx+Ycn$VXYwxtt?SYz3XBJUvo(KP{Pqm zja0N!uY8lolzE;~4@dQ6oLzh;?k*KYBNThJV(~@j_oF-O;wCK@;?x>ZM{^*(PTC3F zl^UWYc%FD}CVfq3M2M_0u9!mnDA}r5NL5%%uj*YpBT0sZUDU7E<(I5M=p7|eFFt7P zi4$ZSdt#3!e9rWyB3BX5rcL`)VdxzUrbf;`L1K@G*nv0d*h}lZXZwNw`nIOvqiuM_ zw{40RL0CS???1^VJJYBx>g(9d-s4*D_-XV@th}y^uK0L78c+A+K%*GkJD0q4e@TCy z*-2f4U_hUC!DdhrnLl!l<`@)4j6KF;)MVm_|$^kUK`0T>5}s3wXKwz}^6Q|euTg(vVTZ{4dzAgVv26TS~` z*Oi5sbsz+3iy?*i@Z9!L9L!dLKe^o^9ScznK~(&jKN7F$SLTMRWT0 zY(wY%EPHeHP@VR|2BdVG>m;!sa%)bOh(s$gRXU=&25Q*FJwslB7uX1cL?~Yw(l)=u zQ-1%jB7CRHj-nmH&;iOVXcuVH83=1qIW4Ab5p?_suT1}wvyV4+GLMw7GU)B%B6#I-xfqbWHWj#UhCs*{cFdL7j4CJ#XaZRb~Scr@_ z02Z@xRpuz`$>YXFb}sI4WPQG^g^jeG{W|nuNYZcz!F=h@EI@iq7V;s48crVY`wB6i zy&z{`MX9)M@@lrt{cs(d{V99JKxS9Ob_l`HkEnFi`Ek|dx%{Zu^b`#YXWuXwEz6tG zmq?F;MGJNNhiK9?lo*O@hpZW8WKU@8AJyw|(WY+iTp)N(^Moqpepg9iE2KPiVVH(1 zV5Kdcb?nP5bc*b4`lwu;13T$t-`&u4Ip{VZaGK@fI;klkZ4uW{2sex;MJ} zH-y$?8=jzOHy!sfMG}s` zuOws<@P;_JThI-DL$dRc)GG76xLI8S>qXU_e{CLyZGdu3EvM32JyF+zB3bz`{iY{N8o3rY15QHBonEr4 zA$W_q6=ODwA)x!O03JpuM!#Eq2+DXsRuzAFk9}?frA4n64|Hm&>s$UBJ>yqZ{H`Wo zxhnZ!ljc=DtLio>urbdaT0zLN@=%+;9V#LcXbHsDNifm@!=q~?WTz)d4GnZuM8>^k zm6z)}J+Irb=?N#`UPosymQ2#cAWaI&n5zA*&3q~3^IeRJMEfCyz?+7v;E;Z6XQm~O zNWQ_;(CvwR=t;SJag5((fvAVsOKI=nQ{Nn|oJW^M0u*t0Uj6qcxi3*5r8X}%!1?}G z4EPbNlPby$oIcBhqA`c$1luXjUyiH#9b*<==aO-0#DiZ?RFM}fq!`VK6T$tKEcTwZV;OtlCPyicf`YOCME+Hv0AgQb*E6Zk(Pzk?&@+O~(N5K@}y3 z`JjuAiip|a84`O12am3+JMUs&rmM#+f6FDKA^3I-Nq&+$9>2T$ZD)5(X3}+~L5R3v z{P`|WnE#VX!k{@HN!xAhU}i-rpt z7K!!XzBc=;E?JcV!j;gf3MOs(2Cr1w{ikJ`!3{VlZ(Ym)79Oy?Q-HGrAz|o!8%Md~ zLG>XEraD*>F*I?i0>OftC(tYf-nkw{Gm?tO7MoTzZ=qAc#r1L_;*4;~gxC*rmxl!f z3S%Y@c&xm+6WCI3^${P${ADIA>YL1~t9Z3#0#3QV46sfcJN&`qBqOwc7YG%!n3D>W zW46n9Lz(9jz$+e3<7zN|l@pv;1;1K1ay~so^X=uCqu|97Ga}I`Q{{^IgvZtXO9kG4?7>Vn&|bjfx=s90_d&_A1MrEz3Qz^Sx0L@TU>98%lgu z6LJkUf{qd$r13S&MUN}aj0D_Xk@87e>e?WdJ)?>gySXYWP{`2II0sJkJa-OAOdp z`vM)Ti$)-%zM{dy$_q3@$Ve8%x1z>M+0CI?AL@r+ zZkdfE){8MhpNyh5x~Dmt_sD>r(xE(ddm~m9ZbD&sW0u3Q=+ozxj< zu>Vp#t3e}Di|_!<(>s8XrSqH*ovU9Ac^Kq8QO8EF&P9H*IeoZu?u2)#ZJXgKI&I(= zL`bNeVN(gR^~6GKj_kN$RS?k?SjeV`~1 zN9zKm!Dfzu3MAhWMQLqQ98+Nlxa#iU-jkZ{b7B_lx^|=WmnwOoED9;>ifUtHi=Et| zUbKmR1o`>8=t~dFt0h6ihchMH=m-d+Bzglx)XC_p;PY&XES6K? zZw2XK@m}jv4zM91oq4H;i^b1#N8bcec1oYgiLJl79bn=&imD&7uIY|cWJ`fhpHhe} zvn9`jhOBno2NI&K>I%SalfT`~+dlfUQQ#gmVJE3Pg^^xiCO>G5owuJS-WM@6 zx$eg~q*J?!RKECmx-D?Um9EstXQCB=We5mEWYoOe+ul!*Zbak;$BsL>9+ya}tvszE z$Efm@tYS48G~T0a3=&p{F>d?TRwX_~c~DTuF6odQv*aBfjhT2M`nk87(dGLyu?pKh zbe!NANxCw<47MC7vymZLDXr|f`UnR{v5Ot{J=NQFl&m*pYnru2H-h#L*l<%__wX6V zX1b-)B6CisxRu-~ffj}=e8wDf!8mOnN{AL*Jzr&9q8CD_kS`~>^~vNgn}mX5%5k~= z+)=PjKO32-SZ!G{&vCj=9QRF84EAI1Q&6@q1XL*DkTQgpJr37iO+JqNLeAR$x(22O z+#eF%>+n}EnP#iZ{z5#ok(0_U_J}v-qw8Mei0kQFn|-gJTV8G*WF(L;_YtF>8JT8O zOzBz{rmb<}y!SiOZC3yD z6Gc&aJ^6JkaWLBxSzYj{Q|b*Qmu2L+V5~oYJDPu~P^|6Q0MbV@7dc!~ee-r~=Hm=& zwUKitl2PZ`;WcFpJ%5L^PmSJZAGhdYk>AE_%ER|O%{OryZpeN@I{MhUnT$3r_;lc9 zvK<|6J#!d5@YkPJx!Lj!FLb=r@%c$`IuqOx_D33F4ua2+3xzbfZd3m|fu#^9^^=db`pmE>zW2w0&=~Es+I8nvuDByFICu zn+yMp>0?z6#|@hStAys&j)bBe9X@cQ&|`&ptsAXZ z_6X?<9?vz=^O8E67gSKI7eDC#z&EuHRuYi3 zq3NZ0oEF{-1>H)+a8TCylU_aVLf3LleRcQv6-EgXVs3J=b@%bkV#!I)gQ?8)pKDBR zL{t^v;6>j2|MAFQ$^Gxs82GR9pRnYguzyD8UtJZW-b3Q>i-G*r{np5uuO#i l_@Dm-{c|PwKTs9se=P{A3W$G`0ssKv?_K#@$%y{-`X5i&evkkF literal 0 HcmV?d00001 diff --git a/dataflow.py b/dataflow.py new file mode 100644 index 0000000..ffbb7a6 --- /dev/null +++ b/dataflow.py @@ -0,0 +1,374 @@ +from lang import * +from abc import ABC, abstractmethod + + +class DataFlowEq(ABC): + """ + A class that implements a data-flow equation. The key trait of a data-flow + equation is an `eval` method, which evaluates that equation. The evaluation + of an equation might change the environment that associates data-flow facts + with identifiers. + + Attributes: + num_evals the number of times that constraints have been evaluated. + Remember to zero this attribute once you start a new static + analysis, so that you can correctly count how many times each + equation had to be evaluated to solve the analysis. + """ + + num_evals = 0 + + def __init__(self, instruction): + """ + Every data-flow equation is produced out of a program instruction. The + initialization of the data-flow equation verifies if, indeed, the input + object is an instruction. + """ + assert isinstance(instruction, Inst) + self.inst = instruction + + @classmethod + @abstractmethod + def name(self) -> str: + """ + The name of a data-flow equation is used to retrieve the data-flow + facts associated with that equation in the environment. For instance, + imagine that we have an equation like this one below: + + "OUT[p] = (v, p) + (IN[p] - (v, _))" + + This equation affects OUT[p]. We store OUT[p] in a dictionary. The name + of the equation is used as the key in this dictionary. For instance, + the name of the equation could be 'OUT_p'. + """ + raise NotImplementedError + + @classmethod + @abstractmethod + def deps(self) -> list: + """ + A list with the name of all the constraints that this equation depends + upon. For instance, if the equation is like: + + "OUT[p] = (v, p) + (IN[p] - (v, _))" + + Then, self.deps() == ['IN_p'] + """ + raise NotImplementedError + + @classmethod + @abstractmethod + def eval_aux(self, data_flow_env) -> set: + """ + This method determines how each concrete equation evaluates itself. + In a way, this design implements the 'template method' pattern. In other + words, the DataFlowEq class implements a concrete method eval, which + calls the abstract method eval_aux. It is the concrete implementation of + eval_aux that determines how the environment is affected by the + evaluation of a given equation. + """ + raise NotImplementedError + + def eval(self, data_flow_env) -> bool: + """ + This method implements the abstract evaluation of a data-flow equation. + Notice that the actual semantics of this evaluation will be implemented + by the `èval_aux` method, which is abstract. + """ + DataFlowEq.num_evals += 1 + old_env = data_flow_env[self.name()] + data_flow_env[self.name()] = self.eval_aux(data_flow_env) + return True if data_flow_env[self.name()] != old_env else False + + +def name_in(ID): + """ + The name of an IN set is always ID + _IN. Eg.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> name_in(add.ID) + 'IN_0' + """ + return f"IN_{ID}" + + +class IN_Eq(DataFlowEq): + """ + This abstract class represents all the equations that affect the IN set + related to some program point. + """ + + def name(self): + return name_in(self.inst.ID) + + +def name_out(ID): + """ + The name of an OUT set is always ID + _OUT. Eg.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> name_out(add.ID) + 'OUT_0' + """ + return f"OUT_{ID}" + + +class OUT_Eq(DataFlowEq): + """ + This abstract class represents all the equations that affect the OUT set + related to some program point. + """ + + def name(self): + return name_out(self.inst.ID) + + +class ReachingDefs_Bin_OUT_Eq(OUT_Eq): + """ + This concrete class implements the equations that affect OUT facts of the + reaching-definitions analysis for binary instructions. These instructions + have three fields: dst, src0 and src1; however, only the former is of + interest for these equations. + """ + + def eval_aux(self, data_flow_env): + """ + Evaluates this equation, where: + OUT[p] = (v, p) + (IN[p] - (v, _)) + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> df = ReachingDefs_Bin_OUT_Eq(i0) + >>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}})) + [('x', 0), ('y', 2)] + """ + in_set = data_flow_env[name_in(self.inst.ID)] + new_set = {(v, p) for (v, p) in in_set if v != self.inst.dst} + return new_set.union([(self.inst.dst, self.inst.ID)]) + + def deps(self): + """ + The list of dependencies of this equation. Ex.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> df = ReachingDefs_Bin_OUT_Eq(add) + >>> df.deps() + ['IN_0'] + """ + return [name_in(self.inst.ID)] + + def __str__(self): + """ + A string representation of a reaching-defs equation representing + a binary instruction. Eg.: + >>> Inst.next_index = 0 + >>> add = Add('x', 'a', 'b') + >>> df = ReachingDefs_Bin_OUT_Eq(add) + >>> str(df) + 'OUT_0: (x, 0) + (IN_0 - (x, _))' + """ + kill_set = f" + ({name_in(self.inst.ID)} - ({self.inst.dst}, _))" + gen_set = f"({self.inst.dst}, {self.inst.ID})" + return f"{self.name()}: {gen_set}{kill_set}" + + +class ReachingDefs_Bt_OUT_Eq(OUT_Eq): + """ + This concrete class implements the equations that affect OUT facts of the + reaching-definitions analysis for branch instructions. These instructions + do not affect reaching definitions at all. Therefore, their equations are + mostly treated as identity functions. + """ + + def eval_aux(self, data_flow_env): + """ + Evaluates this equation. Notice that the reaching definition equation + for a branch instruction is simply the identity function. + OUT[p] = IN[p] + + Example: + >>> Inst.next_index = 0 + >>> i0 = Bt('x') + >>> df = ReachingDefs_Bt_OUT_Eq(i0) + >>> sorted(df.eval_aux({'IN_0': {('x', 1), ('y', 2)}})) + [('x', 1), ('y', 2)] + """ + return data_flow_env[name_in(self.inst.ID)] + + def deps(self): + """ + The list of dependencies of this equation. Ex.: + >>> Inst.next_index = 0 + >>> i = Bt('x') + >>> df = ReachingDefs_Bt_OUT_Eq(i) + >>> df.deps() + ['IN_0'] + """ + return [name_in(self.inst.ID)] + + def __str__(self): + """ + A string representation of a reaching-defs equation representing a + branch. Eg.: + >>> Inst.next_index = 0 + >>> i = Bt('x') + >>> df = ReachingDefs_Bt_OUT_Eq(i) + >>> str(df) + 'OUT_0: IN_0' + """ + kill_set = f"{name_in(self.inst.ID)}" + gen_set = f"" + return f"{self.name()}: {gen_set}{kill_set}" + + +class ReachingDefs_IN_Eq(IN_Eq): + """ + This concrete class implements the meet operation for reaching-definition + analysis. The meet operation produces the IN set of a program point. This + IN set is the union of the OUT set of the predecessors of this point. + """ + + def eval_aux(self, data_flow_env): + """ + The evaluation of the meet operation over reaching definitions is the + union of the OUT sets of the predecessors of the instruction. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> i1 = Add('x', 'c', 'd') + >>> i2 = Add('y', 'x', 'x') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> df = ReachingDefs_IN_Eq(i2) + >>> sorted(df.eval_aux({'OUT_0': {('x', 0)}, 'OUT_1': {('x', 1)}})) + [('x', 0), ('x', 1)] + """ + solution = set() + for inst in self.inst.preds: + solution = solution.union(data_flow_env[name_out(inst.ID)]) + return solution + + def deps(self): + """ + The list of dependencies of this equation. Ex.: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> i1 = Add('x', 'c', 'd') + >>> i2 = Add('y', 'x', 'x') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> df = ReachingDefs_IN_Eq(i2) + >>> sorted(df.deps()) + ['OUT_0', 'OUT_1'] + """ + # TODO: Implement this method + return [] + + def __str__(self): + """ + The name of an IN set is always ID + _IN. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('x', 'a', 'b') + >>> i1 = Add('x', 'c', 'd') + >>> i2 = Add('y', 'x', 'x') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> df = ReachingDefs_IN_Eq(i2) + >>> str(df) + 'IN_2: Union( OUT_0, OUT_1 )' + """ + succs = ", ".join([name_out(pred.ID) for pred in self.inst.preds]) + return f"{self.name()}: Union( {succs} )" + + +def reaching_defs_constraint_gen(insts): + """ + Builds a list of equations to solve Reaching-Definition Analysis for the + given set of instructions. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i2 = Lth('e', 'c', 'd') + >>> i0.add_next(i2) + >>> i1.add_next(i2) + >>> insts = [i0, i1, i2] + >>> sol = [str(eq) for eq in reaching_defs_constraint_gen(insts)] + >>> sol[0] + " " + sol[-1] + 'OUT_0: (c, 0) + (IN_0 - (c, _)) IN_2: Union( OUT_0, OUT_1 )' + """ + in0 = [ReachingDefs_Bin_OUT_Eq(i) for i in insts if isinstance(i, BinOp)] + in1 = [ReachingDefs_Bt_OUT_Eq(i) for i in insts if isinstance(i, Bt)] + out = [ReachingDefs_IN_Eq(i) for i in insts] + return in0 + in1 + out + + +def abstract_interp(equations): + """ + This function iterates on the equations, solving them in the order in which + they appear. It returns an environment with the solution to the data-flow + analysis. + + Example for reaching-definition analysis: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i0.add_next(i1) + >>> eqs = reaching_defs_constraint_gen([i0, i1]) + >>> (sol, num_evals) = abstract_interp(eqs) + >>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}" + "OUT_0: [('c', 0)], Num Evals: 12" + """ + from functools import reduce + + DataFlowEq.num_evals = 0 + env = {eq.name(): set() for eq in equations} + changed = True + while changed: + changed = reduce(lambda acc, eq: eq.eval(env) or acc, equations, False) + return (env, DataFlowEq.num_evals) + +def build_dependence_graph(equations): + """ + This function builds the dependence graph of equations. + + Example: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i0.add_next(i1) + >>> eqs = reaching_defs_constraint_gen([i0, i1]) + >>> deps = build_dependence_graph(eqs) + >>> [eq.name() for eq in deps['IN_0']] + ['OUT_0'] + """ + # TODO: implement this method + dep_graph = {eq.name(): [] for eq in equations} + return dep_graph + +def abstract_interp_worklist(equations): + """ + This function solves the system of equations using a worklist. Once an + equation E is evaluated, and the evaluation changes the environment, only + the dependencies of E are pushed onto the worklist. + + Example for reaching-definition analysis: + >>> Inst.next_index = 0 + >>> i0 = Add('c', 'a', 'b') + >>> i1 = Mul('d', 'c', 'a') + >>> i0.add_next(i1) + >>> eqs = reaching_defs_constraint_gen([i0, i1]) + >>> (sol, num_evals) = abstract_interp_worklist(eqs) + >>> f"OUT_0: {sorted(sol['OUT_0'])}, Num Evals: {num_evals}" + "OUT_0: [('c', 0)], Num Evals: 6" + """ + # TODO: implement this method + from collections import defaultdict + DataFlowEq.num_evals = 0 + env = defaultdict(list) + return (env, DataFlowEq.num_evals) \ No newline at end of file diff --git a/driver.py b/driver.py new file mode 100644 index 0000000..f893608 --- /dev/null +++ b/driver.py @@ -0,0 +1,30 @@ +import sys +import lang +import parser +import dataflow + +from lang import interp + + +def chaotic_solver(program): + equations = dataflow.reaching_defs_constraint_gen(program) + return dataflow.abstract_interp(equations) + + +def worklist_solver(program): + equations = dataflow.reaching_defs_constraint_gen(program) + return dataflow.abstract_interp_worklist(equations) + + +if __name__ == "__main__": + """ + This function reads a program, and solves reaching definition analysis + for it, using either chaotic iterations or the worklist-based algorithm. + """ + lang.Inst.next_index = 0 + lines = sys.stdin.readlines() + env, program = parser.file2cfg_and_env(lines) + (env_chaotic, n_chaotic) = chaotic_solver(program) + (env_worklist, n_worklist) = worklist_solver(program) + print(f"Are the environments the same? {env_chaotic == env_worklist}") + print(f"Does it iterate less than chaotic-sol? {n_worklist <= n_chaotic}") \ No newline at end of file diff --git a/lang.py b/lang.py new file mode 100644 index 0000000..638d307 --- /dev/null +++ b/lang.py @@ -0,0 +1,292 @@ +""" +This file contains the implementation of a simple interpreter of low-level +instructions. The interpreter takes a program, represented as its first +instruction, plus an environment, which is a stack of bindings. Bindings are +pairs of variable names and values. New bindings are added to the stack +whenever new variables are defined. Bindings are never removed from the stack. +In this way, we can inspect the history of state transformations caused by the +interpretation of a program. + +This file uses doctests all over. To test it, just run python 3 as follows: +"python3 -m doctest main.py". The program uses syntax that is excluive of +Python 3. It will not work with standard Python 2. +""" + +from collections import deque +from abc import ABC, abstractmethod + + +class Env: + """ + A table that associates variables with values. The environment is + implemented as a stack, so that previous bindings of a variable V remain + available in the environment if V is overassigned. + + Example: + >>> e = Env() + >>> e.set("a", 2) + >>> e.set("a", 3) + >>> e.get("a") + 3 + + >>> e = Env({"b": 5}) + >>> e.set("a", 2) + >>> e.get("a") + e.get("b") + 7 + """ + + def __init__(s, initial_args={}): + s.env = deque() + for var, value in initial_args.items(): + s.env.appendleft((var, value)) + + def get(self, var): + """ + Finds the first occurrence of variable 'var' in the environment stack, + and returns the value associated with it. + """ + val = next((value for (e_var, value) in self.env if e_var == var), None) + if val is not None: + return val + else: + raise LookupError(f"Absent key {val}") + + def set(s, var, value): + """ + This method adds 'var' to the environment, by placing the binding + '(var, value)' onto the top of the environment stack. + """ + s.env.appendleft((var, value)) + + def dump(s): + """ + Prints the contents of the environment. This method is mostly used for + debugging purposes. + """ + for var, value in s.env: + print(f"{var}: {value}") + + +class Inst(ABC): + """ + The representation of instructions. All that an instruction has, that is + common among all the instructions, is the next_inst attribute. This + attribute determines the next instruction that will be fetched after this + instruction runs. Also, every instruction has an index, which is always + different. The index is incremented whenever a new instruction is created. + """ + + next_index = 0 + + def __init__(self): + self.nexts = [] + self.preds = [] + self.ID = Inst.next_index + Inst.next_index += 1 + + def add_next(self, next_inst): + self.nexts.append(next_inst) + next_inst.preds.append(self) + + @classmethod + @abstractmethod + def definition(self): + raise NotImplementedError + + @classmethod + @abstractmethod + def uses(self): + raise NotImplementedError + + def get_next(self): + if len(self.nexts) > 0: + return self.nexts[0] + else: + return None + + +class BinOp(Inst): + """ + The general class of binary instructions. These instructions define a + value, and use two values. As such, it contains a routine to extract the + defined value, and the list of used values. + """ + + def __init__(s, dst, src0, src1): + s.dst = dst + s.src0 = src0 + s.src1 = src1 + super().__init__() + + @classmethod + @abstractmethod + def get_opcode(self): + raise NotImplementedError + + def definition(s): + return set([s.dst]) + + def uses(s): + return set([s.src0, s.src1]) + + def __str__(self): + op = self.get_opcode() + inst_s = f"{self.ID}: {self.dst} = {self.src0}{op}{self.src1}" + pred_s = f"\n P: {', '.join([str(inst.ID) for inst in self.preds])}" + next_s = f"\n N: {self.nexts[0].ID if len(self.nexts) > 0 else ''}" + return inst_s + pred_s + next_s + + +class Add(BinOp): + """ + Example: + >>> a = Add("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + 5 + + >>> a = Add("a", "b0", "b1") + >>> a.get_next() == None + True + """ + + def eval(self, env): + env.set(self.dst, env.get(self.src0) + env.get(self.src1)) + + def get_opcode(self): + return "+" + + +class Mul(BinOp): + """ + Example: + >>> a = Mul("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + 6 + """ + + def eval(s, env): + env.set(s.dst, env.get(s.src0) * env.get(s.src1)) + + def get_opcode(self): + return "*" + + +class Lth(BinOp): + """ + Example: + >>> a = Lth("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + True + """ + + def eval(s, env): + env.set(s.dst, env.get(s.src0) < env.get(s.src1)) + + def get_opcode(self): + return "<" + + +class Geq(BinOp): + """ + Example: + >>> a = Geq("a", "b0", "b1") + >>> e = Env({"b0":2, "b1":3}) + >>> a.eval(e) + >>> e.get("a") + False + """ + + def eval(s, env): + env.set(s.dst, env.get(s.src0) >= env.get(s.src1)) + + def get_opcode(self): + return ">=" + + +class Bt(Inst): + """ + This is a Branch-If-True instruction, which diverts the control flow to the + 'true_dst' if the predicate 'pred' is true, and to the 'false_dst' + otherwise. + + Example: + >>> e = Env({"t": True, "x": 0}) + >>> a = Add("x", "x", "x") + >>> m = Mul("x", "x", "x") + >>> b = Bt("t", a, m) + >>> b.eval(e) + >>> b.get_next() == a + True + """ + + def __init__(s, cond, true_dst=None, false_dst=None): + super().__init__() + s.cond = cond + s.nexts = [true_dst, false_dst] + if true_dst != None: + true_dst.preds.append(s) + if false_dst != None: + false_dst.preds.append(s) + + def definition(s): + return set() + + def uses(s): + return set([s.cond]) + + def add_true_next(s, true_dst): + s.nexts[0] = true_dst + true_dst.preds.append(s) + + def add_next(s, false_dst): + s.nexts[1] = false_dst + false_dst.preds.append(s) + + def eval(s, env): + """ + The evaluation of the condition sets the next_iter to the instruction. + This value determines which successor instruction is to be evaluated. + Any values greater than 0 are evaluated as True, while 0 corresponds to + False. + """ + if env.get(s.cond): + s.next_iter = 0 + else: + s.next_iter = 1 + + def get_next(s): + return s.nexts[s.next_iter] + + def __str__(self): + inst_s = f"{self.ID}: bt {self.cond}" + pred_s = f"\n P: {', '.join([str(inst.ID) for inst in self.preds])}" + next_s = f"\n NT:{self.nexts[0].ID} NF:{self.nexts[1].ID}" + return inst_s + pred_s + next_s + + +def interp(instruction, environment): + """ + This function evaluates a program until there is no more instructions to + evaluate. + + Example: + >>> env = Env({"m": 3, "n": 2, "zero": 0}) + >>> m_min = Add("answer", "m", "zero") + >>> n_min = Add("answer", "n", "zero") + >>> p = Lth("p", "n", "m") + >>> b = Bt("p", n_min, m_min) + >>> p.add_next(b) + >>> interp(p, env).get("answer") + 2 + """ + if instruction: + instruction.eval(environment) + return interp(instruction.get_next(), environment) + else: + return environment \ No newline at end of file diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..31c33ed --- /dev/null +++ b/parser.py @@ -0,0 +1,62 @@ +""" +This file implements a parser: a function that reads a text file, and returns +a control-flow graph of instructions plus an environment mapping variables to +integer values. +""" + +from lang import * + + +def line2env(line): + """ + Maps a string (the line) to a dictionary in python. This function will be + useful to read the first line of the text file. This line contains the + initial environment of the program that will be created. If you don't like + the function, feel free to drop it off. + + Example + >>> line2env('{"zero": 0, "one": 1, "three": 3, "iter": 9}').get('one') + 1 + """ + import json + + env_dict = json.loads(line) + env_lang = Env() + for k, v in env_dict.items(): + env_lang.set(k, v) + return env_lang + + +def file2cfg_and_env(lines): + """ + Builds a control-flow graph representation for the strings stored in + `lines`. The first string represents the environment. The other strings + represent instructions. + + Example: + >>> l0 = '{"a": 0, "b": 3}' + >>> l1 = 'bt a 1' + >>> l2 = 'x = add a b' + >>> env, prog = file2cfg_and_env([l0, l1, l2]) + >>> interp(prog[0], env).get("x") + 3 + + >>> l0 = '{"a": 1, "b": 3, "x": 42, "z": 0}' + >>> l1 = 'bt a 2' + >>> l2 = 'x = add a b' + >>> l3 = 'x = add x z' + >>> env, prog = file2cfg_and_env([l0, l1, l2, l3]) + >>> interp(prog[0], env).get("x") + 42 + + >>> l0 = '{"a": 1, "b": 3, "c": 5}' + >>> l1 = 'x = add a b' + >>> l2 = 'x = add x c' + >>> env, prog = file2cfg_and_env([l0, l1, l2]) + >>> interp(prog[0], env).get("x") + 9 + """ + # TODO: Imlement this method. + env = line2env(lines[0]) + insts = [] + return (env, insts) \ No newline at end of file