optimize.c 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498
  1. /*
  2. * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
  3. * The Regents of the University of California. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that: (1) source code distributions
  7. * retain the above copyright notice and this paragraph in its entirety, (2)
  8. * distributions including binary code include the above copyright notice and
  9. * this paragraph in its entirety in the documentation or other materials
  10. * provided with the distribution, and (3) all advertising materials mentioning
  11. * features or use of this software display the following acknowledgement:
  12. * ``This product includes software developed by the University of California,
  13. * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14. * the University nor the names of its contributors may be used to endorse
  15. * or promote products derived from this software without specific prior
  16. * written permission.
  17. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18. * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20. *
  21. * Optimization module for BPF code intermediate representation.
  22. */
  23. #ifdef HAVE_CONFIG_H
  24. #include <config.h>
  25. #endif
  26. #include <pcap-types.h>
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <memory.h>
  30. #include <string.h>
  31. #include <errno.h>
  32. #include "pcap-int.h"
  33. #include "gencode.h"
  34. #include "optimize.h"
  35. #ifdef HAVE_OS_PROTO_H
  36. #include "os-proto.h"
  37. #endif
  38. #ifdef BDEBUG
  39. /*
  40. * The internal "debug printout" flag for the filter expression optimizer.
  41. * The code to print that stuff is present only if BDEBUG is defined, so
  42. * the flag, and the routine to set it, are defined only if BDEBUG is
  43. * defined.
  44. */
  45. static int pcap_optimizer_debug;
  46. /*
  47. * Routine to set that flag.
  48. *
  49. * This is intended for libpcap developers, not for general use.
  50. * If you want to set these in a program, you'll have to declare this
  51. * routine yourself, with the appropriate DLL import attribute on Windows;
  52. * it's not declared in any header file, and won't be declared in any
  53. * header file provided by libpcap.
  54. */
  55. PCAP_API void pcap_set_optimizer_debug(int value);
  56. PCAP_API_DEF void
  57. pcap_set_optimizer_debug(int value)
  58. {
  59. pcap_optimizer_debug = value;
  60. }
  61. /*
  62. * The internal "print dot graph" flag for the filter expression optimizer.
  63. * The code to print that stuff is present only if BDEBUG is defined, so
  64. * the flag, and the routine to set it, are defined only if BDEBUG is
  65. * defined.
  66. */
  67. static int pcap_print_dot_graph;
  68. /*
  69. * Routine to set that flag.
  70. *
  71. * This is intended for libpcap developers, not for general use.
  72. * If you want to set these in a program, you'll have to declare this
  73. * routine yourself, with the appropriate DLL import attribute on Windows;
  74. * it's not declared in any header file, and won't be declared in any
  75. * header file provided by libpcap.
  76. */
  77. PCAP_API void pcap_set_print_dot_graph(int value);
  78. PCAP_API_DEF void
  79. pcap_set_print_dot_graph(int value)
  80. {
  81. pcap_print_dot_graph = value;
  82. }
  83. #endif
  84. /*
  85. * lowest_set_bit().
  86. *
  87. * Takes a 32-bit integer as an argument.
  88. *
  89. * If handed a non-zero value, returns the index of the lowest set bit,
  90. * counting upwards fro zero.
  91. *
  92. * If handed zero, the results are platform- and compiler-dependent.
  93. * Keep it out of the light, don't give it any water, don't feed it
  94. * after midnight, and don't pass zero to it.
  95. *
  96. * This is the same as the count of trailing zeroes in the word.
  97. */
  98. #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
  99. /*
  100. * GCC 3.4 and later; we have __builtin_ctz().
  101. */
  102. #define lowest_set_bit(mask) __builtin_ctz(mask)
  103. #elif defined(_MSC_VER)
  104. /*
  105. * Visual Studio; we support only 2005 and later, so use
  106. * _BitScanForward().
  107. */
  108. #include <intrin.h>
  109. #ifndef __clang__
  110. #pragma intrinsic(_BitScanForward)
  111. #endif
  112. static __forceinline int
  113. lowest_set_bit(int mask)
  114. {
  115. unsigned long bit;
  116. /*
  117. * Don't sign-extend mask if long is longer than int.
  118. * (It's currently not, in MSVC, even on 64-bit platforms, but....)
  119. */
  120. if (_BitScanForward(&bit, (unsigned int)mask) == 0)
  121. return -1; /* mask is zero */
  122. return (int)bit;
  123. }
  124. #elif defined(MSDOS) && defined(__DJGPP__)
  125. /*
  126. * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
  127. * we've already included.
  128. */
  129. #define lowest_set_bit(mask) (ffs((mask)) - 1)
  130. #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
  131. /*
  132. * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
  133. * or some other platform (UN*X conforming to a sufficient recent version
  134. * of the Single UNIX Specification).
  135. */
  136. #include <strings.h>
  137. #define lowest_set_bit(mask) (ffs((mask)) - 1)
  138. #else
  139. /*
  140. * None of the above.
  141. * Use a perfect-hash-function-based function.
  142. */
  143. static int
  144. lowest_set_bit(int mask)
  145. {
  146. unsigned int v = (unsigned int)mask;
  147. static const int MultiplyDeBruijnBitPosition[32] = {
  148. 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  149. 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
  150. };
  151. /*
  152. * We strip off all but the lowermost set bit (v & ~v),
  153. * and perform a minimal perfect hash on it to look up the
  154. * number of low-order zero bits in a table.
  155. *
  156. * See:
  157. *
  158. * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
  159. *
  160. * http://supertech.csail.mit.edu/papers/debruijn.pdf
  161. */
  162. return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
  163. }
  164. #endif
  165. /*
  166. * Represents a deleted instruction.
  167. */
  168. #define NOP -1
  169. /*
  170. * Register numbers for use-def values.
  171. * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
  172. * location. A_ATOM is the accumulator and X_ATOM is the index
  173. * register.
  174. */
  175. #define A_ATOM BPF_MEMWORDS
  176. #define X_ATOM (BPF_MEMWORDS+1)
  177. /*
  178. * This define is used to represent *both* the accumulator and
  179. * x register in use-def computations.
  180. * Currently, the use-def code assumes only one definition per instruction.
  181. */
  182. #define AX_ATOM N_ATOMS
  183. /*
  184. * These data structures are used in a Cocke and Shwarz style
  185. * value numbering scheme. Since the flowgraph is acyclic,
  186. * exit values can be propagated from a node's predecessors
  187. * provided it is uniquely defined.
  188. */
  189. struct valnode {
  190. int code;
  191. int v0, v1;
  192. int val;
  193. struct valnode *next;
  194. };
  195. /* Integer constants mapped with the load immediate opcode. */
  196. #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
  197. struct vmapinfo {
  198. int is_const;
  199. bpf_int32 const_val;
  200. };
  201. typedef struct {
  202. /*
  203. * A flag to indicate that further optimization is needed.
  204. * Iterative passes are continued until a given pass yields no
  205. * branch movement.
  206. */
  207. int done;
  208. int n_blocks;
  209. struct block **blocks;
  210. int n_edges;
  211. struct edge **edges;
  212. /*
  213. * A bit vector set representation of the dominators.
  214. * We round up the set size to the next power of two.
  215. */
  216. int nodewords;
  217. int edgewords;
  218. struct block **levels;
  219. bpf_u_int32 *space;
  220. #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
  221. /*
  222. * True if a is in uset {p}
  223. */
  224. #define SET_MEMBER(p, a) \
  225. ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
  226. /*
  227. * Add 'a' to uset p.
  228. */
  229. #define SET_INSERT(p, a) \
  230. (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
  231. /*
  232. * Delete 'a' from uset p.
  233. */
  234. #define SET_DELETE(p, a) \
  235. (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
  236. /*
  237. * a := a intersect b
  238. */
  239. #define SET_INTERSECT(a, b, n)\
  240. {\
  241. register bpf_u_int32 *_x = a, *_y = b;\
  242. register int _n = n;\
  243. while (--_n >= 0) *_x++ &= *_y++;\
  244. }
  245. /*
  246. * a := a - b
  247. */
  248. #define SET_SUBTRACT(a, b, n)\
  249. {\
  250. register bpf_u_int32 *_x = a, *_y = b;\
  251. register int _n = n;\
  252. while (--_n >= 0) *_x++ &=~ *_y++;\
  253. }
  254. /*
  255. * a := a union b
  256. */
  257. #define SET_UNION(a, b, n)\
  258. {\
  259. register bpf_u_int32 *_x = a, *_y = b;\
  260. register int _n = n;\
  261. while (--_n >= 0) *_x++ |= *_y++;\
  262. }
  263. uset all_dom_sets;
  264. uset all_closure_sets;
  265. uset all_edge_sets;
  266. #define MODULUS 213
  267. struct valnode *hashtbl[MODULUS];
  268. int curval;
  269. int maxval;
  270. struct vmapinfo *vmap;
  271. struct valnode *vnode_base;
  272. struct valnode *next_vnode;
  273. } opt_state_t;
  274. typedef struct {
  275. /*
  276. * Some pointers used to convert the basic block form of the code,
  277. * into the array form that BPF requires. 'fstart' will point to
  278. * the malloc'd array while 'ftail' is used during the recursive
  279. * traversal.
  280. */
  281. struct bpf_insn *fstart;
  282. struct bpf_insn *ftail;
  283. } conv_state_t;
  284. static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
  285. static void opt_cleanup(opt_state_t *);
  286. static void intern_blocks(opt_state_t *, struct icode *);
  287. static void find_inedges(opt_state_t *, struct block *);
  288. #ifdef BDEBUG
  289. static void opt_dump(compiler_state_t *, struct icode *);
  290. #endif
  291. #ifndef MAX
  292. #define MAX(a,b) ((a)>(b)?(a):(b))
  293. #endif
  294. static void
  295. find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
  296. {
  297. int level;
  298. if (isMarked(ic, b))
  299. return;
  300. Mark(ic, b);
  301. b->link = 0;
  302. if (JT(b)) {
  303. find_levels_r(opt_state, ic, JT(b));
  304. find_levels_r(opt_state, ic, JF(b));
  305. level = MAX(JT(b)->level, JF(b)->level) + 1;
  306. } else
  307. level = 0;
  308. b->level = level;
  309. b->link = opt_state->levels[level];
  310. opt_state->levels[level] = b;
  311. }
  312. /*
  313. * Level graph. The levels go from 0 at the leaves to
  314. * N_LEVELS at the root. The opt_state->levels[] array points to the
  315. * first node of the level list, whose elements are linked
  316. * with the 'link' field of the struct block.
  317. */
  318. static void
  319. find_levels(opt_state_t *opt_state, struct icode *ic)
  320. {
  321. memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
  322. unMarkAll(ic);
  323. find_levels_r(opt_state, ic, ic->root);
  324. }
  325. /*
  326. * Find dominator relationships.
  327. * Assumes graph has been leveled.
  328. */
  329. static void
  330. find_dom(opt_state_t *opt_state, struct block *root)
  331. {
  332. int i;
  333. struct block *b;
  334. bpf_u_int32 *x;
  335. /*
  336. * Initialize sets to contain all nodes.
  337. */
  338. x = opt_state->all_dom_sets;
  339. i = opt_state->n_blocks * opt_state->nodewords;
  340. while (--i >= 0)
  341. *x++ = 0xFFFFFFFFU;
  342. /* Root starts off empty. */
  343. for (i = opt_state->nodewords; --i >= 0;)
  344. root->dom[i] = 0;
  345. /* root->level is the highest level no found. */
  346. for (i = root->level; i >= 0; --i) {
  347. for (b = opt_state->levels[i]; b; b = b->link) {
  348. SET_INSERT(b->dom, b->id);
  349. if (JT(b) == 0)
  350. continue;
  351. SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
  352. SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
  353. }
  354. }
  355. }
  356. static void
  357. propedom(opt_state_t *opt_state, struct edge *ep)
  358. {
  359. SET_INSERT(ep->edom, ep->id);
  360. if (ep->succ) {
  361. SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
  362. SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
  363. }
  364. }
  365. /*
  366. * Compute edge dominators.
  367. * Assumes graph has been leveled and predecessors established.
  368. */
  369. static void
  370. find_edom(opt_state_t *opt_state, struct block *root)
  371. {
  372. int i;
  373. uset x;
  374. struct block *b;
  375. x = opt_state->all_edge_sets;
  376. for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
  377. x[i] = 0xFFFFFFFFU;
  378. /* root->level is the highest level no found. */
  379. memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
  380. memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
  381. for (i = root->level; i >= 0; --i) {
  382. for (b = opt_state->levels[i]; b != 0; b = b->link) {
  383. propedom(opt_state, &b->et);
  384. propedom(opt_state, &b->ef);
  385. }
  386. }
  387. }
  388. /*
  389. * Find the backwards transitive closure of the flow graph. These sets
  390. * are backwards in the sense that we find the set of nodes that reach
  391. * a given node, not the set of nodes that can be reached by a node.
  392. *
  393. * Assumes graph has been leveled.
  394. */
  395. static void
  396. find_closure(opt_state_t *opt_state, struct block *root)
  397. {
  398. int i;
  399. struct block *b;
  400. /*
  401. * Initialize sets to contain no nodes.
  402. */
  403. memset((char *)opt_state->all_closure_sets, 0,
  404. opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
  405. /* root->level is the highest level no found. */
  406. for (i = root->level; i >= 0; --i) {
  407. for (b = opt_state->levels[i]; b; b = b->link) {
  408. SET_INSERT(b->closure, b->id);
  409. if (JT(b) == 0)
  410. continue;
  411. SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
  412. SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
  413. }
  414. }
  415. }
  416. /*
  417. * Return the register number that is used by s. If A and X are both
  418. * used, return AX_ATOM. If no register is used, return -1.
  419. *
  420. * The implementation should probably change to an array access.
  421. */
  422. static int
  423. atomuse(struct stmt *s)
  424. {
  425. register int c = s->code;
  426. if (c == NOP)
  427. return -1;
  428. switch (BPF_CLASS(c)) {
  429. case BPF_RET:
  430. return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
  431. (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
  432. case BPF_LD:
  433. case BPF_LDX:
  434. return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
  435. (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
  436. case BPF_ST:
  437. return A_ATOM;
  438. case BPF_STX:
  439. return X_ATOM;
  440. case BPF_JMP:
  441. case BPF_ALU:
  442. if (BPF_SRC(c) == BPF_X)
  443. return AX_ATOM;
  444. return A_ATOM;
  445. case BPF_MISC:
  446. return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
  447. }
  448. abort();
  449. /* NOTREACHED */
  450. }
  451. /*
  452. * Return the register number that is defined by 's'. We assume that
  453. * a single stmt cannot define more than one register. If no register
  454. * is defined, return -1.
  455. *
  456. * The implementation should probably change to an array access.
  457. */
  458. static int
  459. atomdef(struct stmt *s)
  460. {
  461. if (s->code == NOP)
  462. return -1;
  463. switch (BPF_CLASS(s->code)) {
  464. case BPF_LD:
  465. case BPF_ALU:
  466. return A_ATOM;
  467. case BPF_LDX:
  468. return X_ATOM;
  469. case BPF_ST:
  470. case BPF_STX:
  471. return s->k;
  472. case BPF_MISC:
  473. return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
  474. }
  475. return -1;
  476. }
  477. /*
  478. * Compute the sets of registers used, defined, and killed by 'b'.
  479. *
  480. * "Used" means that a statement in 'b' uses the register before any
  481. * statement in 'b' defines it, i.e. it uses the value left in
  482. * that register by a predecessor block of this block.
  483. * "Defined" means that a statement in 'b' defines it.
  484. * "Killed" means that a statement in 'b' defines it before any
  485. * statement in 'b' uses it, i.e. it kills the value left in that
  486. * register by a predecessor block of this block.
  487. */
  488. static void
  489. compute_local_ud(struct block *b)
  490. {
  491. struct slist *s;
  492. atomset def = 0, use = 0, killed = 0;
  493. int atom;
  494. for (s = b->stmts; s; s = s->next) {
  495. if (s->s.code == NOP)
  496. continue;
  497. atom = atomuse(&s->s);
  498. if (atom >= 0) {
  499. if (atom == AX_ATOM) {
  500. if (!ATOMELEM(def, X_ATOM))
  501. use |= ATOMMASK(X_ATOM);
  502. if (!ATOMELEM(def, A_ATOM))
  503. use |= ATOMMASK(A_ATOM);
  504. }
  505. else if (atom < N_ATOMS) {
  506. if (!ATOMELEM(def, atom))
  507. use |= ATOMMASK(atom);
  508. }
  509. else
  510. abort();
  511. }
  512. atom = atomdef(&s->s);
  513. if (atom >= 0) {
  514. if (!ATOMELEM(use, atom))
  515. killed |= ATOMMASK(atom);
  516. def |= ATOMMASK(atom);
  517. }
  518. }
  519. if (BPF_CLASS(b->s.code) == BPF_JMP) {
  520. /*
  521. * XXX - what about RET?
  522. */
  523. atom = atomuse(&b->s);
  524. if (atom >= 0) {
  525. if (atom == AX_ATOM) {
  526. if (!ATOMELEM(def, X_ATOM))
  527. use |= ATOMMASK(X_ATOM);
  528. if (!ATOMELEM(def, A_ATOM))
  529. use |= ATOMMASK(A_ATOM);
  530. }
  531. else if (atom < N_ATOMS) {
  532. if (!ATOMELEM(def, atom))
  533. use |= ATOMMASK(atom);
  534. }
  535. else
  536. abort();
  537. }
  538. }
  539. b->def = def;
  540. b->kill = killed;
  541. b->in_use = use;
  542. }
  543. /*
  544. * Assume graph is already leveled.
  545. */
  546. static void
  547. find_ud(opt_state_t *opt_state, struct block *root)
  548. {
  549. int i, maxlevel;
  550. struct block *p;
  551. /*
  552. * root->level is the highest level no found;
  553. * count down from there.
  554. */
  555. maxlevel = root->level;
  556. for (i = maxlevel; i >= 0; --i)
  557. for (p = opt_state->levels[i]; p; p = p->link) {
  558. compute_local_ud(p);
  559. p->out_use = 0;
  560. }
  561. for (i = 1; i <= maxlevel; ++i) {
  562. for (p = opt_state->levels[i]; p; p = p->link) {
  563. p->out_use |= JT(p)->in_use | JF(p)->in_use;
  564. p->in_use |= p->out_use &~ p->kill;
  565. }
  566. }
  567. }
  568. static void
  569. init_val(opt_state_t *opt_state)
  570. {
  571. opt_state->curval = 0;
  572. opt_state->next_vnode = opt_state->vnode_base;
  573. memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
  574. memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
  575. }
  576. /* Because we really don't have an IR, this stuff is a little messy. */
  577. static int
  578. F(opt_state_t *opt_state, int code, int v0, int v1)
  579. {
  580. u_int hash;
  581. int val;
  582. struct valnode *p;
  583. hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
  584. hash %= MODULUS;
  585. for (p = opt_state->hashtbl[hash]; p; p = p->next)
  586. if (p->code == code && p->v0 == v0 && p->v1 == v1)
  587. return p->val;
  588. val = ++opt_state->curval;
  589. if (BPF_MODE(code) == BPF_IMM &&
  590. (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
  591. opt_state->vmap[val].const_val = v0;
  592. opt_state->vmap[val].is_const = 1;
  593. }
  594. p = opt_state->next_vnode++;
  595. p->val = val;
  596. p->code = code;
  597. p->v0 = v0;
  598. p->v1 = v1;
  599. p->next = opt_state->hashtbl[hash];
  600. opt_state->hashtbl[hash] = p;
  601. return val;
  602. }
  603. static inline void
  604. vstore(struct stmt *s, int *valp, int newval, int alter)
  605. {
  606. if (alter && newval != VAL_UNKNOWN && *valp == newval)
  607. s->code = NOP;
  608. else
  609. *valp = newval;
  610. }
  611. /*
  612. * Do constant-folding on binary operators.
  613. * (Unary operators are handled elsewhere.)
  614. */
  615. static void
  616. fold_op(compiler_state_t *cstate, opt_state_t *opt_state,
  617. struct stmt *s, int v0, int v1)
  618. {
  619. bpf_u_int32 a, b;
  620. a = opt_state->vmap[v0].const_val;
  621. b = opt_state->vmap[v1].const_val;
  622. switch (BPF_OP(s->code)) {
  623. case BPF_ADD:
  624. a += b;
  625. break;
  626. case BPF_SUB:
  627. a -= b;
  628. break;
  629. case BPF_MUL:
  630. a *= b;
  631. break;
  632. case BPF_DIV:
  633. if (b == 0)
  634. bpf_error(cstate, "division by zero");
  635. a /= b;
  636. break;
  637. case BPF_MOD:
  638. if (b == 0)
  639. bpf_error(cstate, "modulus by zero");
  640. a %= b;
  641. break;
  642. case BPF_AND:
  643. a &= b;
  644. break;
  645. case BPF_OR:
  646. a |= b;
  647. break;
  648. case BPF_XOR:
  649. a ^= b;
  650. break;
  651. case BPF_LSH:
  652. a <<= b;
  653. break;
  654. case BPF_RSH:
  655. a >>= b;
  656. break;
  657. default:
  658. abort();
  659. }
  660. s->k = a;
  661. s->code = BPF_LD|BPF_IMM;
  662. opt_state->done = 0;
  663. }
  664. static inline struct slist *
  665. this_op(struct slist *s)
  666. {
  667. while (s != 0 && s->s.code == NOP)
  668. s = s->next;
  669. return s;
  670. }
  671. static void
  672. opt_not(struct block *b)
  673. {
  674. struct block *tmp = JT(b);
  675. JT(b) = JF(b);
  676. JF(b) = tmp;
  677. }
  678. static void
  679. opt_peep(opt_state_t *opt_state, struct block *b)
  680. {
  681. struct slist *s;
  682. struct slist *next, *last;
  683. int val;
  684. s = b->stmts;
  685. if (s == 0)
  686. return;
  687. last = s;
  688. for (/*empty*/; /*empty*/; s = next) {
  689. /*
  690. * Skip over nops.
  691. */
  692. s = this_op(s);
  693. if (s == 0)
  694. break; /* nothing left in the block */
  695. /*
  696. * Find the next real instruction after that one
  697. * (skipping nops).
  698. */
  699. next = this_op(s->next);
  700. if (next == 0)
  701. break; /* no next instruction */
  702. last = next;
  703. /*
  704. * st M[k] --> st M[k]
  705. * ldx M[k] tax
  706. */
  707. if (s->s.code == BPF_ST &&
  708. next->s.code == (BPF_LDX|BPF_MEM) &&
  709. s->s.k == next->s.k) {
  710. opt_state->done = 0;
  711. next->s.code = BPF_MISC|BPF_TAX;
  712. }
  713. /*
  714. * ld #k --> ldx #k
  715. * tax txa
  716. */
  717. if (s->s.code == (BPF_LD|BPF_IMM) &&
  718. next->s.code == (BPF_MISC|BPF_TAX)) {
  719. s->s.code = BPF_LDX|BPF_IMM;
  720. next->s.code = BPF_MISC|BPF_TXA;
  721. opt_state->done = 0;
  722. }
  723. /*
  724. * This is an ugly special case, but it happens
  725. * when you say tcp[k] or udp[k] where k is a constant.
  726. */
  727. if (s->s.code == (BPF_LD|BPF_IMM)) {
  728. struct slist *add, *tax, *ild;
  729. /*
  730. * Check that X isn't used on exit from this
  731. * block (which the optimizer might cause).
  732. * We know the code generator won't generate
  733. * any local dependencies.
  734. */
  735. if (ATOMELEM(b->out_use, X_ATOM))
  736. continue;
  737. /*
  738. * Check that the instruction following the ldi
  739. * is an addx, or it's an ldxms with an addx
  740. * following it (with 0 or more nops between the
  741. * ldxms and addx).
  742. */
  743. if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
  744. add = next;
  745. else
  746. add = this_op(next->next);
  747. if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
  748. continue;
  749. /*
  750. * Check that a tax follows that (with 0 or more
  751. * nops between them).
  752. */
  753. tax = this_op(add->next);
  754. if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
  755. continue;
  756. /*
  757. * Check that an ild follows that (with 0 or more
  758. * nops between them).
  759. */
  760. ild = this_op(tax->next);
  761. if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
  762. BPF_MODE(ild->s.code) != BPF_IND)
  763. continue;
  764. /*
  765. * We want to turn this sequence:
  766. *
  767. * (004) ldi #0x2 {s}
  768. * (005) ldxms [14] {next} -- optional
  769. * (006) addx {add}
  770. * (007) tax {tax}
  771. * (008) ild [x+0] {ild}
  772. *
  773. * into this sequence:
  774. *
  775. * (004) nop
  776. * (005) ldxms [14]
  777. * (006) nop
  778. * (007) nop
  779. * (008) ild [x+2]
  780. *
  781. * XXX We need to check that X is not
  782. * subsequently used, because we want to change
  783. * what'll be in it after this sequence.
  784. *
  785. * We know we can eliminate the accumulator
  786. * modifications earlier in the sequence since
  787. * it is defined by the last stmt of this sequence
  788. * (i.e., the last statement of the sequence loads
  789. * a value into the accumulator, so we can eliminate
  790. * earlier operations on the accumulator).
  791. */
  792. ild->s.k += s->s.k;
  793. s->s.code = NOP;
  794. add->s.code = NOP;
  795. tax->s.code = NOP;
  796. opt_state->done = 0;
  797. }
  798. }
  799. /*
  800. * If the comparison at the end of a block is an equality
  801. * comparison against a constant, and nobody uses the value
  802. * we leave in the A register at the end of a block, and
  803. * the operation preceding the comparison is an arithmetic
  804. * operation, we can sometime optimize it away.
  805. */
  806. if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
  807. !ATOMELEM(b->out_use, A_ATOM)) {
  808. /*
  809. * We can optimize away certain subtractions of the
  810. * X register.
  811. */
  812. if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
  813. val = b->val[X_ATOM];
  814. if (opt_state->vmap[val].is_const) {
  815. /*
  816. * If we have a subtract to do a comparison,
  817. * and the X register is a known constant,
  818. * we can merge this value into the
  819. * comparison:
  820. *
  821. * sub x -> nop
  822. * jeq #y jeq #(x+y)
  823. */
  824. b->s.k += opt_state->vmap[val].const_val;
  825. last->s.code = NOP;
  826. opt_state->done = 0;
  827. } else if (b->s.k == 0) {
  828. /*
  829. * If the X register isn't a constant,
  830. * and the comparison in the test is
  831. * against 0, we can compare with the
  832. * X register, instead:
  833. *
  834. * sub x -> nop
  835. * jeq #0 jeq x
  836. */
  837. last->s.code = NOP;
  838. b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
  839. opt_state->done = 0;
  840. }
  841. }
  842. /*
  843. * Likewise, a constant subtract can be simplified:
  844. *
  845. * sub #x -> nop
  846. * jeq #y -> jeq #(x+y)
  847. */
  848. else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
  849. last->s.code = NOP;
  850. b->s.k += last->s.k;
  851. opt_state->done = 0;
  852. }
  853. /*
  854. * And, similarly, a constant AND can be simplified
  855. * if we're testing against 0, i.e.:
  856. *
  857. * and #k nop
  858. * jeq #0 -> jset #k
  859. */
  860. else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
  861. b->s.k == 0) {
  862. b->s.k = last->s.k;
  863. b->s.code = BPF_JMP|BPF_K|BPF_JSET;
  864. last->s.code = NOP;
  865. opt_state->done = 0;
  866. opt_not(b);
  867. }
  868. }
  869. /*
  870. * jset #0 -> never
  871. * jset #ffffffff -> always
  872. */
  873. if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
  874. if (b->s.k == 0)
  875. JT(b) = JF(b);
  876. if ((u_int)b->s.k == 0xffffffffU)
  877. JF(b) = JT(b);
  878. }
  879. /*
  880. * If we're comparing against the index register, and the index
  881. * register is a known constant, we can just compare against that
  882. * constant.
  883. */
  884. val = b->val[X_ATOM];
  885. if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
  886. bpf_int32 v = opt_state->vmap[val].const_val;
  887. b->s.code &= ~BPF_X;
  888. b->s.k = v;
  889. }
  890. /*
  891. * If the accumulator is a known constant, we can compute the
  892. * comparison result.
  893. */
  894. val = b->val[A_ATOM];
  895. if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
  896. bpf_int32 v = opt_state->vmap[val].const_val;
  897. switch (BPF_OP(b->s.code)) {
  898. case BPF_JEQ:
  899. v = v == b->s.k;
  900. break;
  901. case BPF_JGT:
  902. v = (unsigned)v > (unsigned)b->s.k;
  903. break;
  904. case BPF_JGE:
  905. v = (unsigned)v >= (unsigned)b->s.k;
  906. break;
  907. case BPF_JSET:
  908. v &= b->s.k;
  909. break;
  910. default:
  911. abort();
  912. }
  913. if (JF(b) != JT(b))
  914. opt_state->done = 0;
  915. if (v)
  916. JF(b) = JT(b);
  917. else
  918. JT(b) = JF(b);
  919. }
  920. }
  921. /*
  922. * Compute the symbolic value of expression of 's', and update
  923. * anything it defines in the value table 'val'. If 'alter' is true,
  924. * do various optimizations. This code would be cleaner if symbolic
  925. * evaluation and code transformations weren't folded together.
  926. */
  927. static void
  928. opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state,
  929. struct stmt *s, int val[], int alter)
  930. {
  931. int op;
  932. int v;
  933. switch (s->code) {
  934. case BPF_LD|BPF_ABS|BPF_W:
  935. case BPF_LD|BPF_ABS|BPF_H:
  936. case BPF_LD|BPF_ABS|BPF_B:
  937. v = F(opt_state, s->code, s->k, 0L);
  938. vstore(s, &val[A_ATOM], v, alter);
  939. break;
  940. case BPF_LD|BPF_IND|BPF_W:
  941. case BPF_LD|BPF_IND|BPF_H:
  942. case BPF_LD|BPF_IND|BPF_B:
  943. v = val[X_ATOM];
  944. if (alter && opt_state->vmap[v].is_const) {
  945. s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
  946. s->k += opt_state->vmap[v].const_val;
  947. v = F(opt_state, s->code, s->k, 0L);
  948. opt_state->done = 0;
  949. }
  950. else
  951. v = F(opt_state, s->code, s->k, v);
  952. vstore(s, &val[A_ATOM], v, alter);
  953. break;
  954. case BPF_LD|BPF_LEN:
  955. v = F(opt_state, s->code, 0L, 0L);
  956. vstore(s, &val[A_ATOM], v, alter);
  957. break;
  958. case BPF_LD|BPF_IMM:
  959. v = K(s->k);
  960. vstore(s, &val[A_ATOM], v, alter);
  961. break;
  962. case BPF_LDX|BPF_IMM:
  963. v = K(s->k);
  964. vstore(s, &val[X_ATOM], v, alter);
  965. break;
  966. case BPF_LDX|BPF_MSH|BPF_B:
  967. v = F(opt_state, s->code, s->k, 0L);
  968. vstore(s, &val[X_ATOM], v, alter);
  969. break;
  970. case BPF_ALU|BPF_NEG:
  971. if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
  972. s->code = BPF_LD|BPF_IMM;
  973. s->k = -opt_state->vmap[val[A_ATOM]].const_val;
  974. val[A_ATOM] = K(s->k);
  975. }
  976. else
  977. val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
  978. break;
  979. case BPF_ALU|BPF_ADD|BPF_K:
  980. case BPF_ALU|BPF_SUB|BPF_K:
  981. case BPF_ALU|BPF_MUL|BPF_K:
  982. case BPF_ALU|BPF_DIV|BPF_K:
  983. case BPF_ALU|BPF_MOD|BPF_K:
  984. case BPF_ALU|BPF_AND|BPF_K:
  985. case BPF_ALU|BPF_OR|BPF_K:
  986. case BPF_ALU|BPF_XOR|BPF_K:
  987. case BPF_ALU|BPF_LSH|BPF_K:
  988. case BPF_ALU|BPF_RSH|BPF_K:
  989. op = BPF_OP(s->code);
  990. if (alter) {
  991. if (s->k == 0) {
  992. /* don't optimize away "sub #0"
  993. * as it may be needed later to
  994. * fixup the generated math code */
  995. if (op == BPF_ADD ||
  996. op == BPF_LSH || op == BPF_RSH ||
  997. op == BPF_OR || op == BPF_XOR) {
  998. s->code = NOP;
  999. break;
  1000. }
  1001. if (op == BPF_MUL || op == BPF_AND) {
  1002. s->code = BPF_LD|BPF_IMM;
  1003. val[A_ATOM] = K(s->k);
  1004. break;
  1005. }
  1006. }
  1007. if (opt_state->vmap[val[A_ATOM]].is_const) {
  1008. fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k));
  1009. val[A_ATOM] = K(s->k);
  1010. break;
  1011. }
  1012. }
  1013. val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
  1014. break;
  1015. case BPF_ALU|BPF_ADD|BPF_X:
  1016. case BPF_ALU|BPF_SUB|BPF_X:
  1017. case BPF_ALU|BPF_MUL|BPF_X:
  1018. case BPF_ALU|BPF_DIV|BPF_X:
  1019. case BPF_ALU|BPF_MOD|BPF_X:
  1020. case BPF_ALU|BPF_AND|BPF_X:
  1021. case BPF_ALU|BPF_OR|BPF_X:
  1022. case BPF_ALU|BPF_XOR|BPF_X:
  1023. case BPF_ALU|BPF_LSH|BPF_X:
  1024. case BPF_ALU|BPF_RSH|BPF_X:
  1025. op = BPF_OP(s->code);
  1026. if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
  1027. if (opt_state->vmap[val[A_ATOM]].is_const) {
  1028. fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]);
  1029. val[A_ATOM] = K(s->k);
  1030. }
  1031. else {
  1032. s->code = BPF_ALU|BPF_K|op;
  1033. s->k = opt_state->vmap[val[X_ATOM]].const_val;
  1034. opt_state->done = 0;
  1035. val[A_ATOM] =
  1036. F(opt_state, s->code, val[A_ATOM], K(s->k));
  1037. }
  1038. break;
  1039. }
  1040. /*
  1041. * Check if we're doing something to an accumulator
  1042. * that is 0, and simplify. This may not seem like
  1043. * much of a simplification but it could open up further
  1044. * optimizations.
  1045. * XXX We could also check for mul by 1, etc.
  1046. */
  1047. if (alter && opt_state->vmap[val[A_ATOM]].is_const
  1048. && opt_state->vmap[val[A_ATOM]].const_val == 0) {
  1049. if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
  1050. s->code = BPF_MISC|BPF_TXA;
  1051. vstore(s, &val[A_ATOM], val[X_ATOM], alter);
  1052. break;
  1053. }
  1054. else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
  1055. op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
  1056. s->code = BPF_LD|BPF_IMM;
  1057. s->k = 0;
  1058. vstore(s, &val[A_ATOM], K(s->k), alter);
  1059. break;
  1060. }
  1061. else if (op == BPF_NEG) {
  1062. s->code = NOP;
  1063. break;
  1064. }
  1065. }
  1066. val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
  1067. break;
  1068. case BPF_MISC|BPF_TXA:
  1069. vstore(s, &val[A_ATOM], val[X_ATOM], alter);
  1070. break;
  1071. case BPF_LD|BPF_MEM:
  1072. v = val[s->k];
  1073. if (alter && opt_state->vmap[v].is_const) {
  1074. s->code = BPF_LD|BPF_IMM;
  1075. s->k = opt_state->vmap[v].const_val;
  1076. opt_state->done = 0;
  1077. }
  1078. vstore(s, &val[A_ATOM], v, alter);
  1079. break;
  1080. case BPF_MISC|BPF_TAX:
  1081. vstore(s, &val[X_ATOM], val[A_ATOM], alter);
  1082. break;
  1083. case BPF_LDX|BPF_MEM:
  1084. v = val[s->k];
  1085. if (alter && opt_state->vmap[v].is_const) {
  1086. s->code = BPF_LDX|BPF_IMM;
  1087. s->k = opt_state->vmap[v].const_val;
  1088. opt_state->done = 0;
  1089. }
  1090. vstore(s, &val[X_ATOM], v, alter);
  1091. break;
  1092. case BPF_ST:
  1093. vstore(s, &val[s->k], val[A_ATOM], alter);
  1094. break;
  1095. case BPF_STX:
  1096. vstore(s, &val[s->k], val[X_ATOM], alter);
  1097. break;
  1098. }
  1099. }
  1100. static void
  1101. deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
  1102. {
  1103. register int atom;
  1104. atom = atomuse(s);
  1105. if (atom >= 0) {
  1106. if (atom == AX_ATOM) {
  1107. last[X_ATOM] = 0;
  1108. last[A_ATOM] = 0;
  1109. }
  1110. else
  1111. last[atom] = 0;
  1112. }
  1113. atom = atomdef(s);
  1114. if (atom >= 0) {
  1115. if (last[atom]) {
  1116. opt_state->done = 0;
  1117. last[atom]->code = NOP;
  1118. }
  1119. last[atom] = s;
  1120. }
  1121. }
  1122. static void
  1123. opt_deadstores(opt_state_t *opt_state, register struct block *b)
  1124. {
  1125. register struct slist *s;
  1126. register int atom;
  1127. struct stmt *last[N_ATOMS];
  1128. memset((char *)last, 0, sizeof last);
  1129. for (s = b->stmts; s != 0; s = s->next)
  1130. deadstmt(opt_state, &s->s, last);
  1131. deadstmt(opt_state, &b->s, last);
  1132. for (atom = 0; atom < N_ATOMS; ++atom)
  1133. if (last[atom] && !ATOMELEM(b->out_use, atom)) {
  1134. last[atom]->code = NOP;
  1135. opt_state->done = 0;
  1136. }
  1137. }
  1138. static void
  1139. opt_blk(compiler_state_t *cstate, opt_state_t *opt_state,
  1140. struct block *b, int do_stmts)
  1141. {
  1142. struct slist *s;
  1143. struct edge *p;
  1144. int i;
  1145. bpf_int32 aval, xval;
  1146. #if 0
  1147. for (s = b->stmts; s && s->next; s = s->next)
  1148. if (BPF_CLASS(s->s.code) == BPF_JMP) {
  1149. do_stmts = 0;
  1150. break;
  1151. }
  1152. #endif
  1153. /*
  1154. * Initialize the atom values.
  1155. */
  1156. p = b->in_edges;
  1157. if (p == 0) {
  1158. /*
  1159. * We have no predecessors, so everything is undefined
  1160. * upon entry to this block.
  1161. */
  1162. memset((char *)b->val, 0, sizeof(b->val));
  1163. } else {
  1164. /*
  1165. * Inherit values from our predecessors.
  1166. *
  1167. * First, get the values from the predecessor along the
  1168. * first edge leading to this node.
  1169. */
  1170. memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
  1171. /*
  1172. * Now look at all the other nodes leading to this node.
  1173. * If, for the predecessor along that edge, a register
  1174. * has a different value from the one we have (i.e.,
  1175. * control paths are merging, and the merging paths
  1176. * assign different values to that register), give the
  1177. * register the undefined value of 0.
  1178. */
  1179. while ((p = p->next) != NULL) {
  1180. for (i = 0; i < N_ATOMS; ++i)
  1181. if (b->val[i] != p->pred->val[i])
  1182. b->val[i] = 0;
  1183. }
  1184. }
  1185. aval = b->val[A_ATOM];
  1186. xval = b->val[X_ATOM];
  1187. for (s = b->stmts; s; s = s->next)
  1188. opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts);
  1189. /*
  1190. * This is a special case: if we don't use anything from this
  1191. * block, and we load the accumulator or index register with a
  1192. * value that is already there, or if this block is a return,
  1193. * eliminate all the statements.
  1194. *
  1195. * XXX - what if it does a store?
  1196. *
  1197. * XXX - why does it matter whether we use anything from this
  1198. * block? If the accumulator or index register doesn't change
  1199. * its value, isn't that OK even if we use that value?
  1200. *
  1201. * XXX - if we load the accumulator with a different value,
  1202. * and the block ends with a conditional branch, we obviously
  1203. * can't eliminate it, as the branch depends on that value.
  1204. * For the index register, the conditional branch only depends
  1205. * on the index register value if the test is against the index
  1206. * register value rather than a constant; if nothing uses the
  1207. * value we put into the index register, and we're not testing
  1208. * against the index register's value, and there aren't any
  1209. * other problems that would keep us from eliminating this
  1210. * block, can we eliminate it?
  1211. */
  1212. if (do_stmts &&
  1213. ((b->out_use == 0 &&
  1214. aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
  1215. xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
  1216. BPF_CLASS(b->s.code) == BPF_RET)) {
  1217. if (b->stmts != 0) {
  1218. b->stmts = 0;
  1219. opt_state->done = 0;
  1220. }
  1221. } else {
  1222. opt_peep(opt_state, b);
  1223. opt_deadstores(opt_state, b);
  1224. }
  1225. /*
  1226. * Set up values for branch optimizer.
  1227. */
  1228. if (BPF_SRC(b->s.code) == BPF_K)
  1229. b->oval = K(b->s.k);
  1230. else
  1231. b->oval = b->val[X_ATOM];
  1232. b->et.code = b->s.code;
  1233. b->ef.code = -b->s.code;
  1234. }
  1235. /*
  1236. * Return true if any register that is used on exit from 'succ', has
  1237. * an exit value that is different from the corresponding exit value
  1238. * from 'b'.
  1239. */
  1240. static int
  1241. use_conflict(struct block *b, struct block *succ)
  1242. {
  1243. int atom;
  1244. atomset use = succ->out_use;
  1245. if (use == 0)
  1246. return 0;
  1247. for (atom = 0; atom < N_ATOMS; ++atom)
  1248. if (ATOMELEM(use, atom))
  1249. if (b->val[atom] != succ->val[atom])
  1250. return 1;
  1251. return 0;
  1252. }
  1253. static struct block *
  1254. fold_edge(struct block *child, struct edge *ep)
  1255. {
  1256. int sense;
  1257. int aval0, aval1, oval0, oval1;
  1258. int code = ep->code;
  1259. if (code < 0) {
  1260. code = -code;
  1261. sense = 0;
  1262. } else
  1263. sense = 1;
  1264. if (child->s.code != code)
  1265. return 0;
  1266. aval0 = child->val[A_ATOM];
  1267. oval0 = child->oval;
  1268. aval1 = ep->pred->val[A_ATOM];
  1269. oval1 = ep->pred->oval;
  1270. if (aval0 != aval1)
  1271. return 0;
  1272. if (oval0 == oval1)
  1273. /*
  1274. * The operands of the branch instructions are
  1275. * identical, so the result is true if a true
  1276. * branch was taken to get here, otherwise false.
  1277. */
  1278. return sense ? JT(child) : JF(child);
  1279. if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
  1280. /*
  1281. * At this point, we only know the comparison if we
  1282. * came down the true branch, and it was an equality
  1283. * comparison with a constant.
  1284. *
  1285. * I.e., if we came down the true branch, and the branch
  1286. * was an equality comparison with a constant, we know the
  1287. * accumulator contains that constant. If we came down
  1288. * the false branch, or the comparison wasn't with a
  1289. * constant, we don't know what was in the accumulator.
  1290. *
  1291. * We rely on the fact that distinct constants have distinct
  1292. * value numbers.
  1293. */
  1294. return JF(child);
  1295. return 0;
  1296. }
  1297. static void
  1298. opt_j(opt_state_t *opt_state, struct edge *ep)
  1299. {
  1300. register int i, k;
  1301. register struct block *target;
  1302. if (JT(ep->succ) == 0)
  1303. return;
  1304. if (JT(ep->succ) == JF(ep->succ)) {
  1305. /*
  1306. * Common branch targets can be eliminated, provided
  1307. * there is no data dependency.
  1308. */
  1309. if (!use_conflict(ep->pred, ep->succ->et.succ)) {
  1310. opt_state->done = 0;
  1311. ep->succ = JT(ep->succ);
  1312. }
  1313. }
  1314. /*
  1315. * For each edge dominator that matches the successor of this
  1316. * edge, promote the edge successor to the its grandchild.
  1317. *
  1318. * XXX We violate the set abstraction here in favor a reasonably
  1319. * efficient loop.
  1320. */
  1321. top:
  1322. for (i = 0; i < opt_state->edgewords; ++i) {
  1323. register bpf_u_int32 x = ep->edom[i];
  1324. while (x != 0) {
  1325. k = lowest_set_bit(x);
  1326. x &=~ (1 << k);
  1327. k += i * BITS_PER_WORD;
  1328. target = fold_edge(ep->succ, opt_state->edges[k]);
  1329. /*
  1330. * Check that there is no data dependency between
  1331. * nodes that will be violated if we move the edge.
  1332. */
  1333. if (target != 0 && !use_conflict(ep->pred, target)) {
  1334. opt_state->done = 0;
  1335. ep->succ = target;
  1336. if (JT(target) != 0)
  1337. /*
  1338. * Start over unless we hit a leaf.
  1339. */
  1340. goto top;
  1341. return;
  1342. }
  1343. }
  1344. }
  1345. }
  1346. static void
  1347. or_pullup(opt_state_t *opt_state, struct block *b)
  1348. {
  1349. int val, at_top;
  1350. struct block *pull;
  1351. struct block **diffp, **samep;
  1352. struct edge *ep;
  1353. ep = b->in_edges;
  1354. if (ep == 0)
  1355. return;
  1356. /*
  1357. * Make sure each predecessor loads the same value.
  1358. * XXX why?
  1359. */
  1360. val = ep->pred->val[A_ATOM];
  1361. for (ep = ep->next; ep != 0; ep = ep->next)
  1362. if (val != ep->pred->val[A_ATOM])
  1363. return;
  1364. if (JT(b->in_edges->pred) == b)
  1365. diffp = &JT(b->in_edges->pred);
  1366. else
  1367. diffp = &JF(b->in_edges->pred);
  1368. at_top = 1;
  1369. for (;;) {
  1370. if (*diffp == 0)
  1371. return;
  1372. if (JT(*diffp) != JT(b))
  1373. return;
  1374. if (!SET_MEMBER((*diffp)->dom, b->id))
  1375. return;
  1376. if ((*diffp)->val[A_ATOM] != val)
  1377. break;
  1378. diffp = &JF(*diffp);
  1379. at_top = 0;
  1380. }
  1381. samep = &JF(*diffp);
  1382. for (;;) {
  1383. if (*samep == 0)
  1384. return;
  1385. if (JT(*samep) != JT(b))
  1386. return;
  1387. if (!SET_MEMBER((*samep)->dom, b->id))
  1388. return;
  1389. if ((*samep)->val[A_ATOM] == val)
  1390. break;
  1391. /* XXX Need to check that there are no data dependencies
  1392. between dp0 and dp1. Currently, the code generator
  1393. will not produce such dependencies. */
  1394. samep = &JF(*samep);
  1395. }
  1396. #ifdef notdef
  1397. /* XXX This doesn't cover everything. */
  1398. for (i = 0; i < N_ATOMS; ++i)
  1399. if ((*samep)->val[i] != pred->val[i])
  1400. return;
  1401. #endif
  1402. /* Pull up the node. */
  1403. pull = *samep;
  1404. *samep = JF(pull);
  1405. JF(pull) = *diffp;
  1406. /*
  1407. * At the top of the chain, each predecessor needs to point at the
  1408. * pulled up node. Inside the chain, there is only one predecessor
  1409. * to worry about.
  1410. */
  1411. if (at_top) {
  1412. for (ep = b->in_edges; ep != 0; ep = ep->next) {
  1413. if (JT(ep->pred) == b)
  1414. JT(ep->pred) = pull;
  1415. else
  1416. JF(ep->pred) = pull;
  1417. }
  1418. }
  1419. else
  1420. *diffp = pull;
  1421. opt_state->done = 0;
  1422. }
  1423. static void
  1424. and_pullup(opt_state_t *opt_state, struct block *b)
  1425. {
  1426. int val, at_top;
  1427. struct block *pull;
  1428. struct block **diffp, **samep;
  1429. struct edge *ep;
  1430. ep = b->in_edges;
  1431. if (ep == 0)
  1432. return;
  1433. /*
  1434. * Make sure each predecessor loads the same value.
  1435. */
  1436. val = ep->pred->val[A_ATOM];
  1437. for (ep = ep->next; ep != 0; ep = ep->next)
  1438. if (val != ep->pred->val[A_ATOM])
  1439. return;
  1440. if (JT(b->in_edges->pred) == b)
  1441. diffp = &JT(b->in_edges->pred);
  1442. else
  1443. diffp = &JF(b->in_edges->pred);
  1444. at_top = 1;
  1445. for (;;) {
  1446. if (*diffp == 0)
  1447. return;
  1448. if (JF(*diffp) != JF(b))
  1449. return;
  1450. if (!SET_MEMBER((*diffp)->dom, b->id))
  1451. return;
  1452. if ((*diffp)->val[A_ATOM] != val)
  1453. break;
  1454. diffp = &JT(*diffp);
  1455. at_top = 0;
  1456. }
  1457. samep = &JT(*diffp);
  1458. for (;;) {
  1459. if (*samep == 0)
  1460. return;
  1461. if (JF(*samep) != JF(b))
  1462. return;
  1463. if (!SET_MEMBER((*samep)->dom, b->id))
  1464. return;
  1465. if ((*samep)->val[A_ATOM] == val)
  1466. break;
  1467. /* XXX Need to check that there are no data dependencies
  1468. between diffp and samep. Currently, the code generator
  1469. will not produce such dependencies. */
  1470. samep = &JT(*samep);
  1471. }
  1472. #ifdef notdef
  1473. /* XXX This doesn't cover everything. */
  1474. for (i = 0; i < N_ATOMS; ++i)
  1475. if ((*samep)->val[i] != pred->val[i])
  1476. return;
  1477. #endif
  1478. /* Pull up the node. */
  1479. pull = *samep;
  1480. *samep = JT(pull);
  1481. JT(pull) = *diffp;
  1482. /*
  1483. * At the top of the chain, each predecessor needs to point at the
  1484. * pulled up node. Inside the chain, there is only one predecessor
  1485. * to worry about.
  1486. */
  1487. if (at_top) {
  1488. for (ep = b->in_edges; ep != 0; ep = ep->next) {
  1489. if (JT(ep->pred) == b)
  1490. JT(ep->pred) = pull;
  1491. else
  1492. JF(ep->pred) = pull;
  1493. }
  1494. }
  1495. else
  1496. *diffp = pull;
  1497. opt_state->done = 0;
  1498. }
  1499. static void
  1500. opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
  1501. int do_stmts)
  1502. {
  1503. int i, maxlevel;
  1504. struct block *p;
  1505. init_val(opt_state);
  1506. maxlevel = ic->root->level;
  1507. find_inedges(opt_state, ic->root);
  1508. for (i = maxlevel; i >= 0; --i)
  1509. for (p = opt_state->levels[i]; p; p = p->link)
  1510. opt_blk(cstate, opt_state, p, do_stmts);
  1511. if (do_stmts)
  1512. /*
  1513. * No point trying to move branches; it can't possibly
  1514. * make a difference at this point.
  1515. */
  1516. return;
  1517. for (i = 1; i <= maxlevel; ++i) {
  1518. for (p = opt_state->levels[i]; p; p = p->link) {
  1519. opt_j(opt_state, &p->et);
  1520. opt_j(opt_state, &p->ef);
  1521. }
  1522. }
  1523. find_inedges(opt_state, ic->root);
  1524. for (i = 1; i <= maxlevel; ++i) {
  1525. for (p = opt_state->levels[i]; p; p = p->link) {
  1526. or_pullup(opt_state, p);
  1527. and_pullup(opt_state, p);
  1528. }
  1529. }
  1530. }
  1531. static inline void
  1532. link_inedge(struct edge *parent, struct block *child)
  1533. {
  1534. parent->next = child->in_edges;
  1535. child->in_edges = parent;
  1536. }
  1537. static void
  1538. find_inedges(opt_state_t *opt_state, struct block *root)
  1539. {
  1540. int i;
  1541. struct block *b;
  1542. for (i = 0; i < opt_state->n_blocks; ++i)
  1543. opt_state->blocks[i]->in_edges = 0;
  1544. /*
  1545. * Traverse the graph, adding each edge to the predecessor
  1546. * list of its successors. Skip the leaves (i.e. level 0).
  1547. */
  1548. for (i = root->level; i > 0; --i) {
  1549. for (b = opt_state->levels[i]; b != 0; b = b->link) {
  1550. link_inedge(&b->et, JT(b));
  1551. link_inedge(&b->ef, JF(b));
  1552. }
  1553. }
  1554. }
  1555. static void
  1556. opt_root(struct block **b)
  1557. {
  1558. struct slist *tmp, *s;
  1559. s = (*b)->stmts;
  1560. (*b)->stmts = 0;
  1561. while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
  1562. *b = JT(*b);
  1563. tmp = (*b)->stmts;
  1564. if (tmp != 0)
  1565. sappend(s, tmp);
  1566. (*b)->stmts = s;
  1567. /*
  1568. * If the root node is a return, then there is no
  1569. * point executing any statements (since the bpf machine
  1570. * has no side effects).
  1571. */
  1572. if (BPF_CLASS((*b)->s.code) == BPF_RET)
  1573. (*b)->stmts = 0;
  1574. }
  1575. static void
  1576. opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
  1577. int do_stmts)
  1578. {
  1579. #ifdef BDEBUG
  1580. if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
  1581. printf("opt_loop(root, %d) begin\n", do_stmts);
  1582. opt_dump(cstate, ic);
  1583. }
  1584. #endif
  1585. do {
  1586. opt_state->done = 1;
  1587. find_levels(opt_state, ic);
  1588. find_dom(opt_state, ic->root);
  1589. find_closure(opt_state, ic->root);
  1590. find_ud(opt_state, ic->root);
  1591. find_edom(opt_state, ic->root);
  1592. opt_blks(cstate, opt_state, ic, do_stmts);
  1593. #ifdef BDEBUG
  1594. if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
  1595. printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
  1596. opt_dump(cstate, ic);
  1597. }
  1598. #endif
  1599. } while (!opt_state->done);
  1600. }
  1601. /*
  1602. * Optimize the filter code in its dag representation.
  1603. */
  1604. void
  1605. bpf_optimize(compiler_state_t *cstate, struct icode *ic)
  1606. {
  1607. opt_state_t opt_state;
  1608. opt_init(cstate, &opt_state, ic);
  1609. opt_loop(cstate, &opt_state, ic, 0);
  1610. opt_loop(cstate, &opt_state, ic, 1);
  1611. intern_blocks(&opt_state, ic);
  1612. #ifdef BDEBUG
  1613. if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
  1614. printf("after intern_blocks()\n");
  1615. opt_dump(cstate, ic);
  1616. }
  1617. #endif
  1618. opt_root(&ic->root);
  1619. #ifdef BDEBUG
  1620. if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
  1621. printf("after opt_root()\n");
  1622. opt_dump(cstate, ic);
  1623. }
  1624. #endif
  1625. opt_cleanup(&opt_state);
  1626. }
  1627. static void
  1628. make_marks(struct icode *ic, struct block *p)
  1629. {
  1630. if (!isMarked(ic, p)) {
  1631. Mark(ic, p);
  1632. if (BPF_CLASS(p->s.code) != BPF_RET) {
  1633. make_marks(ic, JT(p));
  1634. make_marks(ic, JF(p));
  1635. }
  1636. }
  1637. }
  1638. /*
  1639. * Mark code array such that isMarked(ic->cur_mark, i) is true
  1640. * only for nodes that are alive.
  1641. */
  1642. static void
  1643. mark_code(struct icode *ic)
  1644. {
  1645. ic->cur_mark += 1;
  1646. make_marks(ic, ic->root);
  1647. }
  1648. /*
  1649. * True iff the two stmt lists load the same value from the packet into
  1650. * the accumulator.
  1651. */
  1652. static int
  1653. eq_slist(struct slist *x, struct slist *y)
  1654. {
  1655. for (;;) {
  1656. while (x && x->s.code == NOP)
  1657. x = x->next;
  1658. while (y && y->s.code == NOP)
  1659. y = y->next;
  1660. if (x == 0)
  1661. return y == 0;
  1662. if (y == 0)
  1663. return x == 0;
  1664. if (x->s.code != y->s.code || x->s.k != y->s.k)
  1665. return 0;
  1666. x = x->next;
  1667. y = y->next;
  1668. }
  1669. }
  1670. static inline int
  1671. eq_blk(struct block *b0, struct block *b1)
  1672. {
  1673. if (b0->s.code == b1->s.code &&
  1674. b0->s.k == b1->s.k &&
  1675. b0->et.succ == b1->et.succ &&
  1676. b0->ef.succ == b1->ef.succ)
  1677. return eq_slist(b0->stmts, b1->stmts);
  1678. return 0;
  1679. }
  1680. static void
  1681. intern_blocks(opt_state_t *opt_state, struct icode *ic)
  1682. {
  1683. struct block *p;
  1684. int i, j;
  1685. int done1; /* don't shadow global */
  1686. top:
  1687. done1 = 1;
  1688. for (i = 0; i < opt_state->n_blocks; ++i)
  1689. opt_state->blocks[i]->link = 0;
  1690. mark_code(ic);
  1691. for (i = opt_state->n_blocks - 1; --i >= 0; ) {
  1692. if (!isMarked(ic, opt_state->blocks[i]))
  1693. continue;
  1694. for (j = i + 1; j < opt_state->n_blocks; ++j) {
  1695. if (!isMarked(ic, opt_state->blocks[j]))
  1696. continue;
  1697. if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
  1698. opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
  1699. opt_state->blocks[j]->link : opt_state->blocks[j];
  1700. break;
  1701. }
  1702. }
  1703. }
  1704. for (i = 0; i < opt_state->n_blocks; ++i) {
  1705. p = opt_state->blocks[i];
  1706. if (JT(p) == 0)
  1707. continue;
  1708. if (JT(p)->link) {
  1709. done1 = 0;
  1710. JT(p) = JT(p)->link;
  1711. }
  1712. if (JF(p)->link) {
  1713. done1 = 0;
  1714. JF(p) = JF(p)->link;
  1715. }
  1716. }
  1717. if (!done1)
  1718. goto top;
  1719. }
  1720. static void
  1721. opt_cleanup(opt_state_t *opt_state)
  1722. {
  1723. free((void *)opt_state->vnode_base);
  1724. free((void *)opt_state->vmap);
  1725. free((void *)opt_state->edges);
  1726. free((void *)opt_state->space);
  1727. free((void *)opt_state->levels);
  1728. free((void *)opt_state->blocks);
  1729. }
  1730. /*
  1731. * Return the number of stmts in 's'.
  1732. */
  1733. static u_int
  1734. slength(struct slist *s)
  1735. {
  1736. u_int n = 0;
  1737. for (; s; s = s->next)
  1738. if (s->s.code != NOP)
  1739. ++n;
  1740. return n;
  1741. }
  1742. /*
  1743. * Return the number of nodes reachable by 'p'.
  1744. * All nodes should be initially unmarked.
  1745. */
  1746. static int
  1747. count_blocks(struct icode *ic, struct block *p)
  1748. {
  1749. if (p == 0 || isMarked(ic, p))
  1750. return 0;
  1751. Mark(ic, p);
  1752. return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
  1753. }
  1754. /*
  1755. * Do a depth first search on the flow graph, numbering the
  1756. * the basic blocks, and entering them into the 'blocks' array.`
  1757. */
  1758. static void
  1759. number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
  1760. {
  1761. int n;
  1762. if (p == 0 || isMarked(ic, p))
  1763. return;
  1764. Mark(ic, p);
  1765. n = opt_state->n_blocks++;
  1766. p->id = n;
  1767. opt_state->blocks[n] = p;
  1768. number_blks_r(opt_state, ic, JT(p));
  1769. number_blks_r(opt_state, ic, JF(p));
  1770. }
  1771. /*
  1772. * Return the number of stmts in the flowgraph reachable by 'p'.
  1773. * The nodes should be unmarked before calling.
  1774. *
  1775. * Note that "stmts" means "instructions", and that this includes
  1776. *
  1777. * side-effect statements in 'p' (slength(p->stmts));
  1778. *
  1779. * statements in the true branch from 'p' (count_stmts(JT(p)));
  1780. *
  1781. * statements in the false branch from 'p' (count_stmts(JF(p)));
  1782. *
  1783. * the conditional jump itself (1);
  1784. *
  1785. * an extra long jump if the true branch requires it (p->longjt);
  1786. *
  1787. * an extra long jump if the false branch requires it (p->longjf).
  1788. */
  1789. static u_int
  1790. count_stmts(struct icode *ic, struct block *p)
  1791. {
  1792. u_int n;
  1793. if (p == 0 || isMarked(ic, p))
  1794. return 0;
  1795. Mark(ic, p);
  1796. n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
  1797. return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
  1798. }
  1799. /*
  1800. * Allocate memory. All allocation is done before optimization
  1801. * is begun. A linear bound on the size of all data structures is computed
  1802. * from the total number of blocks and/or statements.
  1803. */
  1804. static void
  1805. opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
  1806. {
  1807. bpf_u_int32 *p;
  1808. int i, n, max_stmts;
  1809. /*
  1810. * First, count the blocks, so we can malloc an array to map
  1811. * block number to block. Then, put the blocks into the array.
  1812. */
  1813. unMarkAll(ic);
  1814. n = count_blocks(ic, ic->root);
  1815. opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
  1816. if (opt_state->blocks == NULL)
  1817. bpf_error(cstate, "malloc");
  1818. unMarkAll(ic);
  1819. opt_state->n_blocks = 0;
  1820. number_blks_r(opt_state, ic, ic->root);
  1821. opt_state->n_edges = 2 * opt_state->n_blocks;
  1822. opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
  1823. if (opt_state->edges == NULL)
  1824. bpf_error(cstate, "malloc");
  1825. /*
  1826. * The number of levels is bounded by the number of nodes.
  1827. */
  1828. opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
  1829. if (opt_state->levels == NULL)
  1830. bpf_error(cstate, "malloc");
  1831. opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
  1832. opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
  1833. /* XXX */
  1834. opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
  1835. + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
  1836. if (opt_state->space == NULL)
  1837. bpf_error(cstate, "malloc");
  1838. p = opt_state->space;
  1839. opt_state->all_dom_sets = p;
  1840. for (i = 0; i < n; ++i) {
  1841. opt_state->blocks[i]->dom = p;
  1842. p += opt_state->nodewords;
  1843. }
  1844. opt_state->all_closure_sets = p;
  1845. for (i = 0; i < n; ++i) {
  1846. opt_state->blocks[i]->closure = p;
  1847. p += opt_state->nodewords;
  1848. }
  1849. opt_state->all_edge_sets = p;
  1850. for (i = 0; i < n; ++i) {
  1851. register struct block *b = opt_state->blocks[i];
  1852. b->et.edom = p;
  1853. p += opt_state->edgewords;
  1854. b->ef.edom = p;
  1855. p += opt_state->edgewords;
  1856. b->et.id = i;
  1857. opt_state->edges[i] = &b->et;
  1858. b->ef.id = opt_state->n_blocks + i;
  1859. opt_state->edges[opt_state->n_blocks + i] = &b->ef;
  1860. b->et.pred = b;
  1861. b->ef.pred = b;
  1862. }
  1863. max_stmts = 0;
  1864. for (i = 0; i < n; ++i)
  1865. max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
  1866. /*
  1867. * We allocate at most 3 value numbers per statement,
  1868. * so this is an upper bound on the number of valnodes
  1869. * we'll need.
  1870. */
  1871. opt_state->maxval = 3 * max_stmts;
  1872. opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
  1873. opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
  1874. if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
  1875. bpf_error(cstate, "malloc");
  1876. }
  1877. /*
  1878. * This is only used when supporting optimizer debugging. It is
  1879. * global state, so do *not* do more than one compile in parallel
  1880. * and expect it to provide meaningful information.
  1881. */
  1882. #ifdef BDEBUG
  1883. int bids[NBIDS];
  1884. #endif
  1885. /*
  1886. * Returns true if successful. Returns false if a branch has
  1887. * an offset that is too large. If so, we have marked that
  1888. * branch so that on a subsequent iteration, it will be treated
  1889. * properly.
  1890. */
  1891. static int
  1892. convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
  1893. struct icode *ic, struct block *p)
  1894. {
  1895. struct bpf_insn *dst;
  1896. struct slist *src;
  1897. u_int slen;
  1898. u_int off;
  1899. u_int extrajmps; /* number of extra jumps inserted */
  1900. struct slist **offset = NULL;
  1901. if (p == 0 || isMarked(ic, p))
  1902. return (1);
  1903. Mark(ic, p);
  1904. if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
  1905. return (0);
  1906. if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
  1907. return (0);
  1908. slen = slength(p->stmts);
  1909. dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
  1910. /* inflate length by any extra jumps */
  1911. p->offset = (int)(dst - conv_state->fstart);
  1912. /* generate offset[] for convenience */
  1913. if (slen) {
  1914. offset = (struct slist **)calloc(slen, sizeof(struct slist *));
  1915. if (!offset) {
  1916. bpf_error(cstate, "not enough core");
  1917. /*NOTREACHED*/
  1918. }
  1919. }
  1920. src = p->stmts;
  1921. for (off = 0; off < slen && src; off++) {
  1922. #if 0
  1923. printf("off=%d src=%x\n", off, src);
  1924. #endif
  1925. offset[off] = src;
  1926. src = src->next;
  1927. }
  1928. off = 0;
  1929. for (src = p->stmts; src; src = src->next) {
  1930. if (src->s.code == NOP)
  1931. continue;
  1932. dst->code = (u_short)src->s.code;
  1933. dst->k = src->s.k;
  1934. /* fill block-local relative jump */
  1935. if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
  1936. #if 0
  1937. if (src->s.jt || src->s.jf) {
  1938. bpf_error(cstate, "illegal jmp destination");
  1939. /*NOTREACHED*/
  1940. }
  1941. #endif
  1942. goto filled;
  1943. }
  1944. if (off == slen - 2) /*???*/
  1945. goto filled;
  1946. {
  1947. u_int i;
  1948. int jt, jf;
  1949. const char ljerr[] = "%s for block-local relative jump: off=%d";
  1950. #if 0
  1951. printf("code=%x off=%d %x %x\n", src->s.code,
  1952. off, src->s.jt, src->s.jf);
  1953. #endif
  1954. if (!src->s.jt || !src->s.jf) {
  1955. bpf_error(cstate, ljerr, "no jmp destination", off);
  1956. /*NOTREACHED*/
  1957. }
  1958. jt = jf = 0;
  1959. for (i = 0; i < slen; i++) {
  1960. if (offset[i] == src->s.jt) {
  1961. if (jt) {
  1962. bpf_error(cstate, ljerr, "multiple matches", off);
  1963. /*NOTREACHED*/
  1964. }
  1965. if (i - off - 1 >= 256) {
  1966. bpf_error(cstate, ljerr, "out-of-range jump", off);
  1967. /*NOTREACHED*/
  1968. }
  1969. dst->jt = (u_char)(i - off - 1);
  1970. jt++;
  1971. }
  1972. if (offset[i] == src->s.jf) {
  1973. if (jf) {
  1974. bpf_error(cstate, ljerr, "multiple matches", off);
  1975. /*NOTREACHED*/
  1976. }
  1977. if (i - off - 1 >= 256) {
  1978. bpf_error(cstate, ljerr, "out-of-range jump", off);
  1979. /*NOTREACHED*/
  1980. }
  1981. dst->jf = (u_char)(i - off - 1);
  1982. jf++;
  1983. }
  1984. }
  1985. if (!jt || !jf) {
  1986. bpf_error(cstate, ljerr, "no destination found", off);
  1987. /*NOTREACHED*/
  1988. }
  1989. }
  1990. filled:
  1991. ++dst;
  1992. ++off;
  1993. }
  1994. if (offset)
  1995. free(offset);
  1996. #ifdef BDEBUG
  1997. if (dst - conv_state->fstart < NBIDS)
  1998. bids[dst - conv_state->fstart] = p->id + 1;
  1999. #endif
  2000. dst->code = (u_short)p->s.code;
  2001. dst->k = p->s.k;
  2002. if (JT(p)) {
  2003. extrajmps = 0;
  2004. off = JT(p)->offset - (p->offset + slen) - 1;
  2005. if (off >= 256) {
  2006. /* offset too large for branch, must add a jump */
  2007. if (p->longjt == 0) {
  2008. /* mark this instruction and retry */
  2009. p->longjt++;
  2010. return(0);
  2011. }
  2012. /* branch if T to following jump */
  2013. if (extrajmps >= 256) {
  2014. bpf_error(cstate, "too many extra jumps");
  2015. /*NOTREACHED*/
  2016. }
  2017. dst->jt = (u_char)extrajmps;
  2018. extrajmps++;
  2019. dst[extrajmps].code = BPF_JMP|BPF_JA;
  2020. dst[extrajmps].k = off - extrajmps;
  2021. }
  2022. else
  2023. dst->jt = (u_char)off;
  2024. off = JF(p)->offset - (p->offset + slen) - 1;
  2025. if (off >= 256) {
  2026. /* offset too large for branch, must add a jump */
  2027. if (p->longjf == 0) {
  2028. /* mark this instruction and retry */
  2029. p->longjf++;
  2030. return(0);
  2031. }
  2032. /* branch if F to following jump */
  2033. /* if two jumps are inserted, F goes to second one */
  2034. if (extrajmps >= 256) {
  2035. bpf_error(cstate, "too many extra jumps");
  2036. /*NOTREACHED*/
  2037. }
  2038. dst->jf = (u_char)extrajmps;
  2039. extrajmps++;
  2040. dst[extrajmps].code = BPF_JMP|BPF_JA;
  2041. dst[extrajmps].k = off - extrajmps;
  2042. }
  2043. else
  2044. dst->jf = (u_char)off;
  2045. }
  2046. return (1);
  2047. }
  2048. /*
  2049. * Convert flowgraph intermediate representation to the
  2050. * BPF array representation. Set *lenp to the number of instructions.
  2051. *
  2052. * This routine does *NOT* leak the memory pointed to by fp. It *must
  2053. * not* do free(fp) before returning fp; doing so would make no sense,
  2054. * as the BPF array pointed to by the return value of icode_to_fcode()
  2055. * must be valid - it's being returned for use in a bpf_program structure.
  2056. *
  2057. * If it appears that icode_to_fcode() is leaking, the problem is that
  2058. * the program using pcap_compile() is failing to free the memory in
  2059. * the BPF program when it's done - the leak is in the program, not in
  2060. * the routine that happens to be allocating the memory. (By analogy, if
  2061. * a program calls fopen() without ever calling fclose() on the FILE *,
  2062. * it will leak the FILE structure; the leak is not in fopen(), it's in
  2063. * the program.) Change the program to use pcap_freecode() when it's
  2064. * done with the filter program. See the pcap man page.
  2065. */
  2066. struct bpf_insn *
  2067. icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
  2068. struct block *root, u_int *lenp)
  2069. {
  2070. u_int n;
  2071. struct bpf_insn *fp;
  2072. conv_state_t conv_state;
  2073. /*
  2074. * Loop doing convert_code_r() until no branches remain
  2075. * with too-large offsets.
  2076. */
  2077. for (;;) {
  2078. unMarkAll(ic);
  2079. n = *lenp = count_stmts(ic, root);
  2080. fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
  2081. if (fp == NULL)
  2082. bpf_error(cstate, "malloc");
  2083. memset((char *)fp, 0, sizeof(*fp) * n);
  2084. conv_state.fstart = fp;
  2085. conv_state.ftail = fp + n;
  2086. unMarkAll(ic);
  2087. if (convert_code_r(cstate, &conv_state, ic, root))
  2088. break;
  2089. free(fp);
  2090. }
  2091. return fp;
  2092. }
  2093. /*
  2094. * Make a copy of a BPF program and put it in the "fcode" member of
  2095. * a "pcap_t".
  2096. *
  2097. * If we fail to allocate memory for the copy, fill in the "errbuf"
  2098. * member of the "pcap_t" with an error message, and return -1;
  2099. * otherwise, return 0.
  2100. */
  2101. int
  2102. install_bpf_program(pcap_t *p, struct bpf_program *fp)
  2103. {
  2104. size_t prog_size;
  2105. /*
  2106. * Validate the program.
  2107. */
  2108. if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
  2109. pcap_snprintf(p->errbuf, sizeof(p->errbuf),
  2110. "BPF program is not valid");
  2111. return (-1);
  2112. }
  2113. /*
  2114. * Free up any already installed program.
  2115. */
  2116. pcap_freecode(&p->fcode);
  2117. prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
  2118. p->fcode.bf_len = fp->bf_len;
  2119. p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
  2120. if (p->fcode.bf_insns == NULL) {
  2121. pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
  2122. errno, "malloc");
  2123. return (-1);
  2124. }
  2125. memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
  2126. return (0);
  2127. }
  2128. #ifdef BDEBUG
  2129. static void
  2130. dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
  2131. FILE *out)
  2132. {
  2133. int icount, noffset;
  2134. int i;
  2135. if (block == NULL || isMarked(ic, block))
  2136. return;
  2137. Mark(ic, block);
  2138. icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
  2139. noffset = min(block->offset + icount, (int)prog->bf_len);
  2140. fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
  2141. for (i = block->offset; i < noffset; i++) {
  2142. fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
  2143. }
  2144. fprintf(out, "\" tooltip=\"");
  2145. for (i = 0; i < BPF_MEMWORDS; i++)
  2146. if (block->val[i] != VAL_UNKNOWN)
  2147. fprintf(out, "val[%d]=%d ", i, block->val[i]);
  2148. fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
  2149. fprintf(out, "val[X]=%d", block->val[X_ATOM]);
  2150. fprintf(out, "\"");
  2151. if (JT(block) == NULL)
  2152. fprintf(out, ", peripheries=2");
  2153. fprintf(out, "];\n");
  2154. dot_dump_node(ic, JT(block), prog, out);
  2155. dot_dump_node(ic, JF(block), prog, out);
  2156. }
  2157. static void
  2158. dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
  2159. {
  2160. if (block == NULL || isMarked(ic, block))
  2161. return;
  2162. Mark(ic, block);
  2163. if (JT(block)) {
  2164. fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
  2165. block->id, JT(block)->id);
  2166. fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
  2167. block->id, JF(block)->id);
  2168. }
  2169. dot_dump_edge(ic, JT(block), out);
  2170. dot_dump_edge(ic, JF(block), out);
  2171. }
  2172. /* Output the block CFG using graphviz/DOT language
  2173. * In the CFG, block's code, value index for each registers at EXIT,
  2174. * and the jump relationship is show.
  2175. *
  2176. * example DOT for BPF `ip src host 1.1.1.1' is:
  2177. digraph BPF {
  2178. block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
  2179. block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
  2180. block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
  2181. block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
  2182. "block0":se -> "block1":n [label="T"];
  2183. "block0":sw -> "block3":n [label="F"];
  2184. "block1":se -> "block2":n [label="T"];
  2185. "block1":sw -> "block3":n [label="F"];
  2186. }
  2187. *
  2188. * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
  2189. * and run `dot -Tpng -O bpf.dot' to draw the graph.
  2190. */
  2191. static void
  2192. dot_dump(compiler_state_t *cstate, struct icode *ic)
  2193. {
  2194. struct bpf_program f;
  2195. FILE *out = stdout;
  2196. memset(bids, 0, sizeof bids);
  2197. f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
  2198. fprintf(out, "digraph BPF {\n");
  2199. unMarkAll(ic);
  2200. dot_dump_node(ic, ic->root, &f, out);
  2201. unMarkAll(ic);
  2202. dot_dump_edge(ic, ic->root, out);
  2203. fprintf(out, "}\n");
  2204. free((char *)f.bf_insns);
  2205. }
  2206. static void
  2207. plain_dump(compiler_state_t *cstate, struct icode *ic)
  2208. {
  2209. struct bpf_program f;
  2210. memset(bids, 0, sizeof bids);
  2211. f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
  2212. bpf_dump(&f, 1);
  2213. putchar('\n');
  2214. free((char *)f.bf_insns);
  2215. }
  2216. static void
  2217. opt_dump(compiler_state_t *cstate, struct icode *ic)
  2218. {
  2219. /*
  2220. * If the CFG, in DOT format, is requested, output it rather than
  2221. * the code that would be generated from that graph.
  2222. */
  2223. if (pcap_print_dot_graph)
  2224. dot_dump(cstate, ic);
  2225. else
  2226. plain_dump(cstate, ic);
  2227. }
  2228. #endif