pppd/optimize.c

   1 /*      From NetBSD: optimize.c,v 1.3 1995/04/29 05:42:28 cgd Exp */
   2
   3 /*
   4  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that: (1) source code distributions
   9  * retain the above copyright notice and this paragraph in its entirety, (2)
  10  * distributions including binary code include the above copyright notice and
  11  * this paragraph in its entirety in the documentation or other materials
  12  * provided with the distribution, and (3) all advertising materials mentioning
  13  * features or use of this software display the following acknowledgement:
  14  * ``This product includes software developed by the University of California,
  15  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  16  * the University nor the names of its contributors may be used to endorse
  17  * or promote products derived from this software without specific prior
  18  * written permission.
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  20  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  21  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  22  *
  23  *  Optimization module for tcpdump intermediate representation.
  24  */
  25 #ifndef lint
  26 static char rcsid[] =
  27     "@(#) Header: optimize.c,v 1.45 94/06/20 19:07:55 leres Exp (LBL)";
  28 #endif
  29
  30 #include <sys/types.h>
  31 #include <sys/time.h>
  32
  33 #include <net/bpf.h>
  34 #include <net/ppp_defs.h>
  35
  36 #include <stdio.h>
  37 #ifdef __osf__
  38 #include <stdlib.h>
  39 #include <malloc.h>
  40 #endif
  41 #ifdef __NetBSD__
  42 #include <stdlib.h>
  43 #endif
  44 #include <memory.h>
  45
  46 #include "gencode.h"
  47
  48 #ifndef __GNUC__
  49 #define inline
  50 #endif
  51
  52 #define A_ATOM BPF_MEMWORDS
  53 #define X_ATOM (BPF_MEMWORDS+1)
  54
  55 #define NOP -1
  56
  57 /*
  58  * This define is used to represent *both* the accumulator and
  59  * x register in use-def computations.
  60  * Currently, the use-def code assumes only one definition per instruction.
  61  */
  62 #define AX_ATOM N_ATOMS
  63
  64 /*
  65  * A flag to indicate that further optimization is needed.
  66  * Iterative passes are continued until a given pass yields no
  67  * branch movement.
  68  */
  69 static int done;
  70
  71 /*
  72  * A block is marked if only if its mark equals the current mark.
  73  * Rather than traverse the code array, marking each item, 'cur_mark' is
  74  * incremented.  This automatically makes each element unmarked.
  75  */
  76 static int cur_mark;
  77 #define isMarked(p) ((p)->mark == cur_mark)
  78 #define unMarkAll() cur_mark += 1
  79 #define Mark(p) ((p)->mark = cur_mark)
  80
  81 static void opt_init(struct block *);
  82 static void opt_cleanup(void);
  83
  84 static void make_marks(struct block *);
  85 static void mark_code(struct block *);
  86
  87 static void intern_blocks(struct block *);
  88
  89 static int eq_slist(struct slist *, struct slist *);
  90
  91 static void find_levels_r(struct block *);
  92
  93 static void find_levels(struct block *);
  94 static void find_dom(struct block *);
  95 static void propedom(struct edge *);
  96 static void find_edom(struct block *);
  97 static void find_closure(struct block *);
  98 static int atomuse(struct stmt *);
  99 static int atomdef(struct stmt *);
 100 static void compute_local_ud(struct block *);
 101 static void find_ud(struct block *);
 102 static void init_val(void);
 103 static long F(int, long, long);
 104 static inline void vstore(struct stmt *, long *, long, int);
 105 static void opt_blk(struct block *, int);
 106 static int use_conflict(struct block *, struct block *);
 107 static void opt_j(struct edge *);
 108 static void or_pullup(struct block *);
 109 static void and_pullup(struct block *);
 110 static void opt_blks(struct block *, int);
 111 static inline void link_inedge(struct edge *, struct block *);
 112 static void find_inedges(struct block *);
 113 static void opt_root(struct block **);
 114 static void opt_loop(struct block *, int);
 115 static void fold_op(struct stmt *, long, long);
 116 static inline struct slist *this_op(struct slist *);
 117 static void opt_not(struct block *);
 118 static void opt_peep(struct block *);
 119 static void opt_stmt(struct stmt *, long[], int);
 120 static void deadstmt(struct stmt *, struct stmt *[]);
 121 static void opt_deadstores(struct block *);
 122 static void opt_blk(struct block *, int);
 123 static int use_conflict(struct block *, struct block *);
 124 static void opt_j(struct edge *);
 125 static struct block *fold_edge(struct block *, struct edge *);
 126 static inline int eq_blk(struct block *, struct block *);
 127 static int slength(struct slist *);
 128 static int count_blocks(struct block *);
 129 static void number_blks_r(struct block *);
 130 static int count_stmts(struct block *);
 131 static void convert_code_r(struct block *);
 132
 133 static int n_blocks;
 134 struct block **blocks;
 135 static int n_edges;
 136 struct edge **edges;
 137
 138 /*
 139  * A bit vector set representation of the dominators.
 140  * We round up the set size to the next power of two.
 141  */
 142 static int nodewords;
 143 static int edgewords;
 144 struct block **levels;
 145 u_long *space;
 146 #define BITS_PER_WORD (8*sizeof(u_long))
 147 /*
 148  * True if a is in uset {p}
 149  */
 150 #define SET_MEMBER(p, a) \
 151 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 152
 153 /*
 154  * Add 'a' to uset p.
 155  */
 156 #define SET_INSERT(p, a) \
 157 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 158
 159 /*
 160  * Delete 'a' from uset p.
 161  */
 162 #define SET_DELETE(p, a) \
 163 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 164
 165 /*
 166  * a := a intersect b
 167  */
 168 #define SET_INTERSECT(a, b, n)\
 169 {\
 170         register u_long *_x = a, *_y = b;\
 171         register int _n = n;\
 172         while (--_n >= 0) *_x++ &= *_y++;\
 173 }
 174
 175 /*
 176  * a := a - b
 177  */
 178 #define SET_SUBTRACT(a, b, n)\
 179 {\
 180         register u_long *_x = a, *_y = b;\
 181         register int _n = n;\
 182         while (--_n >= 0) *_x++ &=~ *_y++;\
 183 }
 184
 185 /*
 186  * a := a union b
 187  */
 188 #define SET_UNION(a, b, n)\
 189 {\
 190         register u_long *_x = a, *_y = b;\
 191         register int _n = n;\
 192         while (--_n >= 0) *_x++ |= *_y++;\
 193 }
 194
 195 static uset all_dom_sets;
 196 static uset all_closure_sets;
 197 static uset all_edge_sets;
 198
 199 #ifndef MAX
 200 #define MAX(a,b) ((a)>(b)?(a):(b))
 201 #endif
 202
 203 static void
 204 find_levels_r(b)
 205         struct block *b;
 206 {
 207         int level;
 208
 209         if (isMarked(b))
 210                 return;
 211
 212         Mark(b);
 213         b->link = 0;
 214
 215         if (JT(b)) {
 216                 find_levels_r(JT(b));
 217                 find_levels_r(JF(b));
 218                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 219         } else
 220                 level = 0;
 221         b->level = level;
 222         b->link = levels[level];
 223         levels[level] = b;
 224 }
 225
 226 /*
 227  * Level graph.  The levels go from 0 at the leaves to
 228  * N_LEVELS at the root.  The levels[] array points to the
 229  * first node of the level list, whose elements are linked
 230  * with the 'link' field of the struct block.
 231  */
 232 static void
 233 find_levels(root)
 234         struct block *root;
 235 {
 236         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 237         unMarkAll();
 238         find_levels_r(root);
 239 }
 240
 241 /*
 242  * Find dominator relationships.
 243  * Assumes graph has been leveled.
 244  */
 245 static void
 246 find_dom(root)
 247         struct block *root;
 248 {
 249         int i;
 250         struct block *b;
 251         u_long *x;
 252
 253         /*
 254          * Initialize sets to contain all nodes.
 255          */
 256         x = all_dom_sets;
 257         i = n_blocks * nodewords;
 258         while (--i >= 0)
 259                 *x++ = ~0;
 260         /* Root starts off empty. */
 261         for (i = nodewords; --i >= 0;)
 262                 root->dom[i] = 0;
 263
 264         /* root->level is the highest level no found. */
 265         for (i = root->level; i >= 0; --i) {
 266                 for (b = levels[i]; b; b = b->link) {
 267                         SET_INSERT(b->dom, b->id);
 268                         if (JT(b) == 0)
 269                                 continue;
 270                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 271                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 272                 }
 273         }
 274 }
 275
 276 static void
 277 propedom(ep)
 278         struct edge *ep;
 279 {
 280         SET_INSERT(ep->edom, ep->id);
 281         if (ep->succ) {
 282                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 283                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 284         }
 285 }
 286
 287 /*
 288  * Compute edge dominators.
 289  * Assumes graph has been leveled and predecessors established.
 290  */
 291 static void
 292 find_edom(root)
 293         struct block *root;
 294 {
 295         int i;
 296         uset x;
 297         struct block *b;
 298
 299         x = all_edge_sets;
 300         for (i = n_edges * edgewords; --i >= 0; )
 301                 x[i] = ~0;
 302
 303         /* root->level is the highest level no found. */
 304         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 305         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 306         for (i = root->level; i >= 0; --i) {
 307                 for (b = levels[i]; b != 0; b = b->link) {
 308                         propedom(&b->et);
 309                         propedom(&b->ef);
 310                 }
 311         }
 312 }
 313
 314 /*
 315  * Find the backwards transitive closure of the flow graph.  These sets
 316  * are backwards in the sense that we find the set of nodes that reach
 317  * a given node, not the set of nodes that can be reached by a node.
 318  *
 319  * Assumes graph has been leveled.
 320  */
 321 static void
 322 find_closure(root)
 323         struct block *root;
 324 {
 325         int i;
 326         struct block *b;
 327
 328         /*
 329          * Initialize sets to contain no nodes.
 330          */
 331         memset((char *)all_closure_sets, 0,
 332               n_blocks * nodewords * sizeof(*all_closure_sets));
 333
 334         /* root->level is the highest level no found. */
 335         for (i = root->level; i >= 0; --i) {
 336                 for (b = levels[i]; b; b = b->link) {
 337                         SET_INSERT(b->closure, b->id);
 338                         if (JT(b) == 0)
 339                                 continue;
 340                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 341                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 342                 }
 343         }
 344 }
 345
 346 /*
 347  * Return the register number that is used by s.  If A and X are both
 348  * used, return AX_ATOM.  If no register is used, return -1.
 349  *
 350  * The implementation should probably change to an array access.
 351  */
 352 static int
 353 atomuse(s)
 354         struct stmt *s;
 355 {
 356         register int c = s->code;
 357
 358         if (c == NOP)
 359                 return -1;
 360
 361         switch (BPF_CLASS(c)) {
 362
 363         case BPF_RET:
 364                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 365                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 366
 367         case BPF_LD:
 368         case BPF_LDX:
 369                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 370                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 371
 372         case BPF_ST:
 373                 return A_ATOM;
 374
 375         case BPF_STX:
 376                 return X_ATOM;
 377
 378         case BPF_JMP:
 379         case BPF_ALU:
 380                 if (BPF_SRC(c) == BPF_X)
 381                         return AX_ATOM;
 382                 return A_ATOM;
 383
 384         case BPF_MISC:
 385                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 386         }
 387         abort();
 388         /* NOTREACHED */
 389 }
 390
 391 /*
 392  * Return the register number that is defined by 's'.  We assume that
 393  * a single stmt cannot define more than one register.  If no register
 394  * is defined, return -1.
 395  *
 396  * The implementation should probably change to an array access.
 397  */
 398 static int
 399 atomdef(s)
 400         struct stmt *s;
 401 {
 402         if (s->code == NOP)
 403                 return -1;
 404
 405         switch (BPF_CLASS(s->code)) {
 406
 407         case BPF_LD:
 408         case BPF_ALU:
 409                 return A_ATOM;
 410
 411         case BPF_LDX:
 412                 return X_ATOM;
 413
 414         case BPF_ST:
 415         case BPF_STX:
 416                 return s->k;
 417
 418         case BPF_MISC:
 419                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 420         }
 421         return -1;
 422 }
 423
 424 static void
 425 compute_local_ud(b)
 426         struct block *b;
 427 {
 428         struct slist *s;
 429         atomset def = 0, use = 0, kill = 0;
 430         int atom;
 431
 432         for (s = b->stmts; s; s = s->next) {
 433                 if (s->s.code == NOP)
 434                         continue;
 435                 atom = atomuse(&s->s);
 436                 if (atom >= 0) {
 437                         if (atom == AX_ATOM) {
 438                                 if (!ATOMELEM(def, X_ATOM))
 439                                         use |= ATOMMASK(X_ATOM);
 440                                 if (!ATOMELEM(def, A_ATOM))
 441                                         use |= ATOMMASK(A_ATOM);
 442                         }
 443                         else if (atom < N_ATOMS) {
 444                                 if (!ATOMELEM(def, atom))
 445                                         use |= ATOMMASK(atom);
 446                         }
 447                         else
 448                                 abort();
 449                 }
 450                 atom = atomdef(&s->s);
 451                 if (atom >= 0) {
 452                         if (!ATOMELEM(use, atom))
 453                                 kill |= ATOMMASK(atom);
 454                         def |= ATOMMASK(atom);
 455                 }
 456         }
 457         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 458                 use |= ATOMMASK(A_ATOM);
 459
 460         b->def = def;
 461         b->kill = kill;
 462         b->in_use = use;
 463 }
 464
 465 /*
 466  * Assume graph is already leveled.
 467  */
 468 static void
 469 find_ud(root)
 470         struct block *root;
 471 {
 472         int i, maxlevel;
 473         struct block *p;
 474
 475         /*
 476          * root->level is the highest level no found;
 477          * count down from there.
 478          */
 479         maxlevel = root->level;
 480         for (i = maxlevel; i >= 0; --i)
 481                 for (p = levels[i]; p; p = p->link) {
 482                         compute_local_ud(p);
 483                         p->out_use = 0;
 484                 }
 485
 486         for (i = 1; i <= maxlevel; ++i) {
 487                 for (p = levels[i]; p; p = p->link) {
 488                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 489                         p->in_use |= p->out_use &~ p->kill;
 490                 }
 491         }
 492 }
 493
 494 /*
 495  * These data structures are used in a Cocke and Shwarz style
 496  * value numbering scheme.  Since the flowgraph is acyclic,
 497  * exit values can be propagated from a node's predecessors
 498  * provided it is uniquely defined.
 499  */
 500 struct valnode {
 501         int code;
 502         long v0, v1;
 503         long val;
 504         struct valnode *next;
 505 };
 506
 507 #define MODULUS 213
 508 static struct valnode *hashtbl[MODULUS];
 509 static int curval;
 510 static int maxval;
 511
 512 /* Integer constants mapped with the load immediate opcode. */
 513 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 514
 515 struct vmapinfo {
 516         int is_const;
 517         long const_val;
 518 };
 519
 520 struct vmapinfo *vmap;
 521 struct valnode *vnode_base;
 522 struct valnode *next_vnode;
 523
 524 static void
 525 init_val()
 526 {
 527         curval = 0;
 528         next_vnode = vnode_base;
 529         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 530         memset((char *)hashtbl, 0, sizeof hashtbl);
 531 }
 532
 533 /* Because we really don't have an IR, this stuff is a little messy. */
 534 static long
 535 F(code, v0, v1)
 536         int code;
 537         long v0, v1;
 538 {
 539         u_int hash;
 540         int val;
 541         struct valnode *p;
 542
 543         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 544         hash %= MODULUS;
 545
 546         for (p = hashtbl[hash]; p; p = p->next)
 547                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 548                         return p->val;
 549
 550         val = ++curval;
 551         if (BPF_MODE(code) == BPF_IMM &&
 552             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 553                 vmap[val].const_val = v0;
 554                 vmap[val].is_const = 1;
 555         }
 556         p = next_vnode++;
 557         p->val = val;
 558         p->code = code;
 559         p->v0 = v0;
 560         p->v1 = v1;
 561         p->next = hashtbl[hash];
 562         hashtbl[hash] = p;
 563
 564         return val;
 565 }
 566
 567 static inline void
 568 vstore(s, valp, newval, alter)
 569         struct stmt *s;
 570         long *valp;
 571         long newval;
 572         int alter;
 573 {
 574         if (alter && *valp == newval)
 575                 s->code = NOP;
 576         else
 577                 *valp = newval;
 578 }
 579
 580 static void
 581 fold_op(s, v0, v1)
 582         struct stmt *s;
 583         long v0, v1;
 584 {
 585         long a, b;
 586
 587         a = vmap[v0].const_val;
 588         b = vmap[v1].const_val;
 589
 590         switch (BPF_OP(s->code)) {
 591         case BPF_ADD:
 592                 a += b;
 593                 break;
 594
 595         case BPF_SUB:
 596                 a -= b;
 597                 break;
 598
 599         case BPF_MUL:
 600                 a *= b;
 601                 break;
 602
 603         case BPF_DIV:
 604                 if (b == 0)
 605                         bpf_error("division by zero");
 606                 a /= b;
 607                 break;
 608
 609         case BPF_AND:
 610                 a &= b;
 611                 break;
 612
 613         case BPF_OR:
 614                 a |= b;
 615                 break;
 616
 617         case BPF_LSH:
 618                 a <<= b;
 619                 break;
 620
 621         case BPF_RSH:
 622                 a >>= b;
 623                 break;
 624
 625         case BPF_NEG:
 626                 a = -a;
 627                 break;
 628
 629         default:
 630                 abort();
 631         }
 632         s->k = a;
 633         s->code = BPF_LD|BPF_IMM;
 634         done = 0;
 635 }
 636
 637 static inline struct slist *
 638 this_op(s)
 639         struct slist *s;
 640 {
 641         while (s != 0 && s->s.code == NOP)
 642                 s = s->next;
 643         return s;
 644 }
 645
 646 static void
 647 opt_not(b)
 648         struct block *b;
 649 {
 650         struct block *tmp = JT(b);
 651
 652         JT(b) = JF(b);
 653         JF(b) = tmp;
 654 }
 655
 656 static void
 657 opt_peep(b)
 658         struct block *b;
 659 {
 660         struct slist *s;
 661         struct slist *next, *last;
 662         int val;
 663         long v;
 664
 665         s = b->stmts;
 666         if (s == 0)
 667                 return;
 668
 669         last = s;
 670         while (1) {
 671                 s = this_op(s);
 672                 if (s == 0)
 673                         break;
 674                 next = this_op(s->next);
 675                 if (next == 0)
 676                         break;
 677                 last = next;
 678
 679                 /*
 680                  * st  M[k]     -->     st  M[k]
 681                  * ldx M[k]             tax
 682                  */
 683                 if (s->s.code == BPF_ST &&
 684                     next->s.code == (BPF_LDX|BPF_MEM) &&
 685                     s->s.k == next->s.k) {
 686                         done = 0;
 687                         next->s.code = BPF_MISC|BPF_TAX;
 688                 }
 689                 /*
 690                  * ld  #k       -->     ldx  #k
 691                  * tax                  txa
 692                  */
 693                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 694                     next->s.code == (BPF_MISC|BPF_TAX)) {
 695                         s->s.code = BPF_LDX|BPF_IMM;
 696                         next->s.code = BPF_MISC|BPF_TXA;
 697                         done = 0;
 698                 }
 699                 /*
 700                  * This is an ugly special case, but it happens
 701                  * when you say tcp[k] or udp[k] where k is a constant.
 702                  */
 703                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 704                         struct slist *add, *tax, *ild;
 705
 706                         /*
 707                          * Check that X isn't used on exit from this
 708                          * block (which the optimizer might cause).
 709                          * We know the code generator won't generate
 710                          * any local dependencies.
 711                          */
 712                         if (ATOMELEM(b->out_use, X_ATOM))
 713                                 break;
 714
 715                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 716                                 add = next;
 717                         else
 718                                 add = this_op(next->next);
 719                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 720                                 break;
 721
 722                         tax = this_op(add->next);
 723                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 724                                 break;
 725
 726                         ild = this_op(tax->next);
 727                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 728                             BPF_MODE(ild->s.code) != BPF_IND)
 729                                 break;
 730                         /*
 731                          * XXX We need to check that X is not
 732                          * subsequently used.  We know we can eliminate the
 733                          * accumulator modifications since it is defined
 734                          * by the last stmt of this sequence.
 735                          *
 736                          * We want to turn this sequence:
 737                          *
 738                          * (004) ldi     #0x2           {s}
 739                          * (005) ldxms   [14]           {next}  -- optional
 740                          * (006) addx                   {add}
 741                          * (007) tax                    {tax}
 742                          * (008) ild     [x+0]          {ild}
 743                          *
 744                          * into this sequence:
 745                          *
 746                          * (004) nop
 747                          * (005) ldxms   [14]
 748                          * (006) nop
 749                          * (007) nop
 750                          * (008) ild     [x+2]
 751                          *
 752                          */
 753                         ild->s.k += s->s.k;
 754                         s->s.code = NOP;
 755                         add->s.code = NOP;
 756                         tax->s.code = NOP;
 757                         done = 0;
 758                 }
 759                 s = next;
 760         }
 761         /*
 762          * If we have a subtract to do a comparison, and the X register
 763          * is a known constant, we can merge this value into the
 764          * comparison.
 765          */
 766         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 767             !ATOMELEM(b->out_use, A_ATOM)) {
 768                 val = b->val[X_ATOM];
 769                 if (vmap[val].is_const) {
 770                         b->s.k += vmap[val].const_val;
 771                         last->s.code = NOP;
 772                         done = 0;
 773                 } else if (b->s.k == 0) {
 774                         /*
 775                          * sub x  ->    nop
 776                          * j  #0        j  x
 777                          */
 778                         last->s.code = NOP;
 779                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 780                                 BPF_X;
 781                         done = 0;
 782                 }
 783         }
 784         /*
 785          * Likewise, a constant subtract can be simplified.
 786          */
 787         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 788                  !ATOMELEM(b->out_use, A_ATOM)) {
 789                 b->s.k += last->s.k;
 790                 last->s.code = NOP;
 791                 done = 0;
 792         }
 793         /*
 794          * and #k       nop
 795          * jeq #0  ->   jset #k
 796          */
 797         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 798             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 799                 b->s.k = last->s.k;
 800                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 801                 last->s.code = NOP;
 802                 done = 0;
 803                 opt_not(b);
 804         }
 805         /*
 806          * If the accumulator is a known constant, we can compute the
 807          * comparison result.
 808          */
 809         val = b->val[A_ATOM];
 810         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 811                 v = vmap[val].const_val;
 812                 switch (BPF_OP(b->s.code)) {
 813
 814                 case BPF_JEQ:
 815                         v = v == b->s.k;
 816                         break;
 817
 818                 case BPF_JGT:
 819                         v = v > b->s.k;
 820                         break;
 821
 822                 case BPF_JGE:
 823                         v = v >= b->s.k;
 824                         break;
 825
 826                 case BPF_JSET:
 827                         v &= b->s.k;
 828                         break;
 829
 830                 default:
 831                         abort();
 832                 }
 833                 if (JF(b) != JT(b))
 834                         done = 0;
 835                 if (v)
 836                         JF(b) = JT(b);
 837                 else
 838                         JT(b) = JF(b);
 839         }
 840 }
 841
 842 /*
 843  * Compute the symbolic value of expression of 's', and update
 844  * anything it defines in the value table 'val'.  If 'alter' is true,
 845  * do various optimizations.  This code would be cleaner if symbolic
 846  * evaluation and code transformations weren't folded together.
 847  */
 848 static void
 849 opt_stmt(s, val, alter)
 850         struct stmt *s;
 851         long val[];
 852         int alter;
 853 {
 854         int op;
 855         long v;
 856
 857         switch (s->code) {
 858
 859         case BPF_LD|BPF_ABS|BPF_W:
 860         case BPF_LD|BPF_ABS|BPF_H:
 861         case BPF_LD|BPF_ABS|BPF_B:
 862                 v = F(s->code, s->k, 0L);
 863                 vstore(s, &val[A_ATOM], v, alter);
 864                 break;
 865
 866         case BPF_LD|BPF_IND|BPF_W:
 867         case BPF_LD|BPF_IND|BPF_H:
 868         case BPF_LD|BPF_IND|BPF_B:
 869                 v = val[X_ATOM];
 870                 if (alter && vmap[v].is_const) {
 871                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 872                         s->k += vmap[v].const_val;
 873                         v = F(s->code, s->k, 0L);
 874                         done = 0;
 875                 }
 876                 else
 877                         v = F(s->code, s->k, v);
 878                 vstore(s, &val[A_ATOM], v, alter);
 879                 break;
 880
 881         case BPF_LD|BPF_LEN:
 882                 v = F(s->code, 0L, 0L);
 883                 vstore(s, &val[A_ATOM], v, alter);
 884                 break;
 885
 886         case BPF_LD|BPF_IMM:
 887                 v = K(s->k);
 888                 vstore(s, &val[A_ATOM], v, alter);
 889                 break;
 890
 891         case BPF_LDX|BPF_IMM:
 892                 v = K(s->k);
 893                 vstore(s, &val[X_ATOM], v, alter);
 894                 break;
 895
 896         case BPF_LDX|BPF_MSH|BPF_B:
 897                 v = F(s->code, s->k, 0L);
 898                 vstore(s, &val[X_ATOM], v, alter);
 899                 break;
 900
 901         case BPF_ALU|BPF_NEG:
 902                 if (alter && vmap[val[A_ATOM]].is_const) {
 903                         s->code = BPF_LD|BPF_IMM;
 904                         s->k = -vmap[val[A_ATOM]].const_val;
 905                         val[A_ATOM] = K(s->k);
 906                 }
 907                 else
 908                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 909                 break;
 910
 911         case BPF_ALU|BPF_ADD|BPF_K:
 912         case BPF_ALU|BPF_SUB|BPF_K:
 913         case BPF_ALU|BPF_MUL|BPF_K:
 914         case BPF_ALU|BPF_DIV|BPF_K:
 915         case BPF_ALU|BPF_AND|BPF_K:
 916         case BPF_ALU|BPF_OR|BPF_K:
 917         case BPF_ALU|BPF_LSH|BPF_K:
 918         case BPF_ALU|BPF_RSH|BPF_K:
 919                 op = BPF_OP(s->code);
 920                 if (alter) {
 921                         if (s->k == 0) {
 922                                 if (op == BPF_ADD || op == BPF_SUB ||
 923                                     op == BPF_LSH || op == BPF_RSH ||
 924                                     op == BPF_OR) {
 925                                         s->code = NOP;
 926                                         break;
 927                                 }
 928                                 if (op == BPF_MUL || op == BPF_AND) {
 929                                         s->code = BPF_LD|BPF_IMM;
 930                                         val[A_ATOM] = K(s->k);
 931                                         break;
 932                                 }
 933                         }
 934                         if (vmap[val[A_ATOM]].is_const) {
 935                                 fold_op(s, val[A_ATOM], K(s->k));
 936                                 val[A_ATOM] = K(s->k);
 937                                 break;
 938                         }
 939                 }
 940                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 941                 break;
 942
 943         case BPF_ALU|BPF_ADD|BPF_X:
 944         case BPF_ALU|BPF_SUB|BPF_X:
 945         case BPF_ALU|BPF_MUL|BPF_X:
 946         case BPF_ALU|BPF_DIV|BPF_X:
 947         case BPF_ALU|BPF_AND|BPF_X:
 948         case BPF_ALU|BPF_OR|BPF_X:
 949         case BPF_ALU|BPF_LSH|BPF_X:
 950         case BPF_ALU|BPF_RSH|BPF_X:
 951                 op = BPF_OP(s->code);
 952                 if (alter && vmap[val[X_ATOM]].is_const) {
 953                         if (vmap[val[A_ATOM]].is_const) {
 954                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 955                                 val[A_ATOM] = K(s->k);
 956                         }
 957                         else {
 958                                 s->code = BPF_ALU|BPF_K|op;
 959                                 s->k = vmap[val[X_ATOM]].const_val;
 960                                 done = 0;
 961                                 val[A_ATOM] =
 962                                         F(s->code, val[A_ATOM], K(s->k));
 963                         }
 964                         break;
 965                 }
 966                 /*
 967                  * Check if we're doing something to an accumulator
 968                  * that is 0, and simplify.  This may not seem like
 969                  * much of a simplification but it could open up further
 970                  * optimizations.
 971                  * XXX We could also check for mul by 1, and -1, etc.
 972                  */
 973                 if (alter && vmap[val[A_ATOM]].is_const
 974                     && vmap[val[A_ATOM]].const_val == 0) {
 975                         if (op == BPF_ADD || op == BPF_OR ||
 976                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
 977                                 s->code = BPF_MISC|BPF_TXA;
 978                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 979                                 break;
 980                         }
 981                         else if (op == BPF_MUL || op == BPF_DIV ||
 982                                  op == BPF_AND) {
 983                                 s->code = BPF_LD|BPF_IMM;
 984                                 s->k = 0;
 985                                 vstore(s, &val[A_ATOM], K(s->k), alter);
 986                                 break;
 987                         }
 988                         else if (op == BPF_NEG) {
 989                                 s->code = NOP;
 990                                 break;
 991                         }
 992                 }
 993                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
 994                 break;
 995
 996         case BPF_MISC|BPF_TXA:
 997                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 998                 break;
 999
1000         case BPF_LD|BPF_MEM:
1001                 v = val[s->k];
1002                 if (alter && vmap[v].is_const) {
1003                         s->code = BPF_LD|BPF_IMM;
1004                         s->k = vmap[v].const_val;
1005                         done = 0;
1006                 }
1007                 vstore(s, &val[A_ATOM], v, alter);
1008                 break;
1009
1010         case BPF_MISC|BPF_TAX:
1011                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1012                 break;
1013
1014         case BPF_LDX|BPF_MEM:
1015                 v = val[s->k];
1016                 if (alter && vmap[v].is_const) {
1017                         s->code = BPF_LDX|BPF_IMM;
1018                         s->k = vmap[v].const_val;
1019                         done = 0;
1020                 }
1021                 vstore(s, &val[X_ATOM], v, alter);
1022                 break;
1023
1024         case BPF_ST:
1025                 vstore(s, &val[s->k], val[A_ATOM], alter);
1026                 break;
1027
1028         case BPF_STX:
1029                 vstore(s, &val[s->k], val[X_ATOM], alter);
1030                 break;
1031         }
1032 }
1033
1034 static void
1035 deadstmt(s, last)
1036         register struct stmt *s;
1037         register struct stmt *last[];
1038 {
1039         register int atom;
1040
1041         atom = atomuse(s);
1042         if (atom >= 0) {
1043                 if (atom == AX_ATOM) {
1044                         last[X_ATOM] = 0;
1045                         last[A_ATOM] = 0;
1046                 }
1047                 else
1048                         last[atom] = 0;
1049         }
1050         atom = atomdef(s);
1051         if (atom >= 0) {
1052                 if (last[atom]) {
1053                         done = 0;
1054                         last[atom]->code = NOP;
1055                 }
1056                 last[atom] = s;
1057         }
1058 }
1059
1060 static void
1061 opt_deadstores(b)
1062         register struct block *b;
1063 {
1064         register struct slist *s;
1065         register int atom;
1066         struct stmt *last[N_ATOMS];
1067
1068         memset((char *)last, 0, sizeof last);
1069
1070         for (s = b->stmts; s != 0; s = s->next)
1071                 deadstmt(&s->s, last);
1072         deadstmt(&b->s, last);
1073
1074         for (atom = 0; atom < N_ATOMS; ++atom)
1075                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1076                         last[atom]->code = NOP;
1077                         done = 0;
1078                 }
1079 }
1080
1081 static void
1082 opt_blk(b, do_stmts)
1083         struct block *b;
1084         int do_stmts;
1085 {
1086         struct slist *s;
1087         struct edge *p;
1088         int i;
1089         long aval;
1090
1091         /*
1092          * Initialize the atom values.
1093          * If we have no predecessors, everything is undefined.
1094          * Otherwise, we inherent our values from our predecessors.
1095          * If any register has an ambiguous value (i.e. control paths are
1096          * merging) give it the undefined value of 0.
1097          */
1098         p = b->in_edges;
1099         if (p == 0)
1100                 memset((char *)b->val, 0, sizeof(b->val));
1101         else {
1102                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1103                 while ((p = p->next) != NULL) {
1104                         for (i = 0; i < N_ATOMS; ++i)
1105                                 if (b->val[i] != p->pred->val[i])
1106                                         b->val[i] = 0;
1107                 }
1108         }
1109         aval = b->val[A_ATOM];
1110         for (s = b->stmts; s; s = s->next)
1111                 opt_stmt(&s->s, b->val, do_stmts);
1112
1113         /*
1114          * This is a special case: if we don't use anything from this
1115          * block, and we load the accumulator with value that is
1116          * already there, eliminate all the statements.
1117          */
1118         if (do_stmts && b->out_use == 0 && aval != 0 &&
1119             b->val[A_ATOM] == aval)
1120                 b->stmts = 0;
1121         else {
1122                 opt_peep(b);
1123                 opt_deadstores(b);
1124         }
1125         /*
1126          * Set up values for branch optimizer.
1127          */
1128         if (BPF_SRC(b->s.code) == BPF_K)
1129                 b->oval = K(b->s.k);
1130         else
1131                 b->oval = b->val[X_ATOM];
1132         b->et.code = b->s.code;
1133         b->ef.code = -b->s.code;
1134 }
1135
1136 /*
1137  * Return true if any register that is used on exit from 'succ', has
1138  * an exit value that is different from the corresponding exit value
1139  * from 'b'.
1140  */
1141 static int
1142 use_conflict(b, succ)
1143         struct block *b, *succ;
1144 {
1145         int atom;
1146         atomset use = succ->out_use;
1147
1148         if (use == 0)
1149                 return 0;
1150
1151         for (atom = 0; atom < N_ATOMS; ++atom)
1152                 if (ATOMELEM(use, atom))
1153                         if (b->val[atom] != succ->val[atom])
1154                                 return 1;
1155         return 0;
1156 }
1157
1158 static struct block *
1159 fold_edge(child, ep)
1160         struct block *child;
1161         struct edge *ep;
1162 {
1163         int sense;
1164         int aval0, aval1, oval0, oval1;
1165         int code = ep->code;
1166
1167         if (code < 0) {
1168                 code = -code;
1169                 sense = 0;
1170         } else
1171                 sense = 1;
1172
1173         if (child->s.code != code)
1174                 return 0;
1175
1176         aval0 = child->val[A_ATOM];
1177         oval0 = child->oval;
1178         aval1 = ep->pred->val[A_ATOM];
1179         oval1 = ep->pred->oval;
1180
1181         if (aval0 != aval1)
1182                 return 0;
1183
1184         if (oval0 == oval1)
1185                 /*
1186                  * The operands are identical, so the
1187                  * result is true if a true branch was
1188                  * taken to get here, otherwise false.
1189                  */
1190                 return sense ? JT(child) : JF(child);
1191
1192         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1193                 /*
1194                  * At this point, we only know the comparison if we
1195                  * came down the true branch, and it was an equality
1196                  * comparison with a constant.  We rely on the fact that
1197                  * distinct constants have distinct value numbers.
1198                  */
1199                 return JF(child);
1200
1201         return 0;
1202 }
1203
1204 static void
1205 opt_j(ep)
1206         struct edge *ep;
1207 {
1208         register int i, k;
1209         register struct block *target;
1210
1211         if (JT(ep->succ) == 0)
1212                 return;
1213
1214         if (JT(ep->succ) == JF(ep->succ)) {
1215                 /*
1216                  * Common branch targets can be eliminated, provided
1217                  * there is no data dependency.
1218                  */
1219                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1220                         done = 0;
1221                         ep->succ = JT(ep->succ);
1222                 }
1223         }
1224         /*
1225          * For each edge dominator that matches the successor of this
1226          * edge, promote the edge successor to the its grandchild.
1227          *
1228          * XXX We violate the set abstraction here in favor a reasonably
1229          * efficient loop.
1230          */
1231  top:
1232         for (i = 0; i < edgewords; ++i) {
1233                 register u_long x = ep->edom[i];
1234
1235                 while (x != 0) {
1236                         k = ffs(x) - 1;
1237                         x &=~ (1 << k);
1238                         k += i * BITS_PER_WORD;
1239
1240                         target = fold_edge(ep->succ, edges[k]);
1241                         /*
1242                          * Check that there is no data dependency between
1243                          * nodes that will be violated if we move the edge.
1244                          */
1245                         if (target != 0 && !use_conflict(ep->pred, target)) {
1246                                 done = 0;
1247                                 ep->succ = target;
1248                                 if (JT(target) != 0)
1249                                         /*
1250                                          * Start over unless we hit a leaf.
1251                                          */
1252                                         goto top;
1253                                 return;
1254                         }
1255                 }
1256         }
1257 }
1258
1259
1260 static void
1261 or_pullup(b)
1262         struct block *b;
1263 {
1264         int val, at_top;
1265         struct block *pull;
1266         struct block **diffp, **samep;
1267         struct edge *ep;
1268
1269         ep = b->in_edges;
1270         if (ep == 0)
1271                 return;
1272
1273         /*
1274          * Make sure each predecessor loads the same value.
1275          * XXX why?
1276          */
1277         val = ep->pred->val[A_ATOM];
1278         for (ep = ep->next; ep != 0; ep = ep->next)
1279                 if (val != ep->pred->val[A_ATOM])
1280                         return;
1281
1282         if (JT(b->in_edges->pred) == b)
1283                 diffp = &JT(b->in_edges->pred);
1284         else
1285                 diffp = &JF(b->in_edges->pred);
1286
1287         at_top = 1;
1288         while (1) {
1289                 if (*diffp == 0)
1290                         return;
1291
1292                 if (JT(*diffp) != JT(b))
1293                         return;
1294
1295                 if (!SET_MEMBER((*diffp)->dom, b->id))
1296                         return;
1297
1298                 if ((*diffp)->val[A_ATOM] != val)
1299                         break;
1300
1301                 diffp = &JF(*diffp);
1302                 at_top = 0;
1303         }
1304         samep = &JF(*diffp);
1305         while (1) {
1306                 if (*samep == 0)
1307                         return;
1308
1309                 if (JT(*samep) != JT(b))
1310                         return;
1311
1312                 if (!SET_MEMBER((*samep)->dom, b->id))
1313                         return;
1314
1315                 if ((*samep)->val[A_ATOM] == val)
1316                         break;
1317
1318                 /* XXX Need to check that there are no data dependencies
1319                    between dp0 and dp1.  Currently, the code generator
1320                    will not produce such dependencies. */
1321                 samep = &JF(*samep);
1322         }
1323 #ifdef notdef
1324         /* XXX This doesn't cover everything. */
1325         for (i = 0; i < N_ATOMS; ++i)
1326                 if ((*samep)->val[i] != pred->val[i])
1327                         return;
1328 #endif
1329         /* Pull up the node. */
1330         pull = *samep;
1331         *samep = JF(pull);
1332         JF(pull) = *diffp;
1333
1334         /*
1335          * At the top of the chain, each predecessor needs to point at the
1336          * pulled up node.  Inside the chain, there is only one predecessor
1337          * to worry about.
1338          */
1339         if (at_top) {
1340                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1341                         if (JT(ep->pred) == b)
1342                                 JT(ep->pred) = pull;
1343                         else
1344                                 JF(ep->pred) = pull;
1345                 }
1346         }
1347         else
1348                 *diffp = pull;
1349
1350         done = 0;
1351 }
1352
1353 static void
1354 and_pullup(b)
1355         struct block *b;
1356 {
1357         int val, at_top;
1358         struct block *pull;
1359         struct block **diffp, **samep;
1360         struct edge *ep;
1361
1362         ep = b->in_edges;
1363         if (ep == 0)
1364                 return;
1365
1366         /*
1367          * Make sure each predecessor loads the same value.
1368          */
1369         val = ep->pred->val[A_ATOM];
1370         for (ep = ep->next; ep != 0; ep = ep->next)
1371                 if (val != ep->pred->val[A_ATOM])
1372                         return;
1373
1374         if (JT(b->in_edges->pred) == b)
1375                 diffp = &JT(b->in_edges->pred);
1376         else
1377                 diffp = &JF(b->in_edges->pred);
1378
1379         at_top = 1;
1380         while (1) {
1381                 if (*diffp == 0)
1382                         return;
1383
1384                 if (JF(*diffp) != JF(b))
1385                         return;
1386
1387                 if (!SET_MEMBER((*diffp)->dom, b->id))
1388                         return;
1389
1390                 if ((*diffp)->val[A_ATOM] != val)
1391                         break;
1392
1393                 diffp = &JT(*diffp);
1394                 at_top = 0;
1395         }
1396         samep = &JT(*diffp);
1397         while (1) {
1398                 if (*samep == 0)
1399                         return;
1400
1401                 if (JF(*samep) != JF(b))
1402                         return;
1403
1404                 if (!SET_MEMBER((*samep)->dom, b->id))
1405                         return;
1406
1407                 if ((*samep)->val[A_ATOM] == val)
1408                         break;
1409
1410                 /* XXX Need to check that there are no data dependencies
1411                    between diffp and samep.  Currently, the code generator
1412                    will not produce such dependencies. */
1413                 samep = &JT(*samep);
1414         }
1415 #ifdef notdef
1416         /* XXX This doesn't cover everything. */
1417         for (i = 0; i < N_ATOMS; ++i)
1418                 if ((*samep)->val[i] != pred->val[i])
1419                         return;
1420 #endif
1421         /* Pull up the node. */
1422         pull = *samep;
1423         *samep = JT(pull);
1424         JT(pull) = *diffp;
1425
1426         /*
1427          * At the top of the chain, each predecessor needs to point at the
1428          * pulled up node.  Inside the chain, there is only one predecessor
1429          * to worry about.
1430          */
1431         if (at_top) {
1432                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1433                         if (JT(ep->pred) == b)
1434                                 JT(ep->pred) = pull;
1435                         else
1436                                 JF(ep->pred) = pull;
1437                 }
1438         }
1439         else
1440                 *diffp = pull;
1441
1442         done = 0;
1443 }
1444
1445 static void
1446 opt_blks(root, do_stmts)
1447         struct block *root;
1448         int do_stmts;
1449 {
1450         int i, maxlevel;
1451         struct block *p;
1452
1453         init_val();
1454         maxlevel = root->level;
1455         for (i = maxlevel; i >= 0; --i)
1456                 for (p = levels[i]; p; p = p->link)
1457                         opt_blk(p, do_stmts);
1458
1459         if (do_stmts)
1460                 /*
1461                  * No point trying to move branches; it can't possibly
1462                  * make a difference at this point.
1463                  */
1464                 return;
1465
1466         for (i = 1; i <= maxlevel; ++i) {
1467                 for (p = levels[i]; p; p = p->link) {
1468                         opt_j(&p->et);
1469                         opt_j(&p->ef);
1470                 }
1471         }
1472         for (i = 1; i <= maxlevel; ++i) {
1473                 for (p = levels[i]; p; p = p->link) {
1474                         or_pullup(p);
1475                         and_pullup(p);
1476                 }
1477         }
1478 }
1479
1480 static inline void
1481 link_inedge(parent, child)
1482         struct edge *parent;
1483         struct block *child;
1484 {
1485         parent->next = child->in_edges;
1486         child->in_edges = parent;
1487 }
1488
1489 static void
1490 find_inedges(root)
1491         struct block *root;
1492 {
1493         int i;
1494         struct block *b;
1495
1496         for (i = 0; i < n_blocks; ++i)
1497                 blocks[i]->in_edges = 0;
1498
1499         /*
1500          * Traverse the graph, adding each edge to the predecessor
1501          * list of its successors.  Skip the leaves (i.e. level 0).
1502          */
1503         for (i = root->level; i > 0; --i) {
1504                 for (b = levels[i]; b != 0; b = b->link) {
1505                         link_inedge(&b->et, JT(b));
1506                         link_inedge(&b->ef, JF(b));
1507                 }
1508         }
1509 }
1510
1511 static void
1512 opt_root(b)
1513         struct block **b;
1514 {
1515         struct slist *tmp, *s;
1516
1517         s = (*b)->stmts;
1518         (*b)->stmts = 0;
1519         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1520                 *b = JT(*b);
1521
1522         tmp = (*b)->stmts;
1523         if (tmp != 0)
1524                 sappend(s, tmp);
1525         (*b)->stmts = s;
1526 }
1527
1528 static void
1529 opt_loop(root, do_stmts)
1530         struct block *root;
1531         int do_stmts;
1532 {
1533
1534 #ifdef BDEBUG
1535         if (dflag > 1)
1536                 opt_dump(root);
1537 #endif
1538         do {
1539                 done = 1;
1540                 find_levels(root);
1541                 find_dom(root);
1542                 find_closure(root);
1543                 find_inedges(root);
1544                 find_ud(root);
1545                 find_edom(root);
1546                 opt_blks(root, do_stmts);
1547 #ifdef BDEBUG
1548                 if (dflag > 1)
1549                         opt_dump(root);
1550 #endif
1551         } while (!done);
1552 }
1553
1554 /*
1555  * Optimize the filter code in its dag representation.
1556  */
1557 void
1558 bpf_optimize(rootp)
1559         struct block **rootp;
1560 {
1561         struct block *root;
1562
1563         root = *rootp;
1564
1565         opt_init(root);
1566         opt_loop(root, 0);
1567         opt_loop(root, 1);
1568         intern_blocks(root);
1569         opt_root(rootp);
1570         opt_cleanup();
1571 }
1572
1573 static void
1574 make_marks(p)
1575         struct block *p;
1576 {
1577         if (!isMarked(p)) {
1578                 Mark(p);
1579                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1580                         make_marks(JT(p));
1581                         make_marks(JF(p));
1582                 }
1583         }
1584 }
1585
1586 /*
1587  * Mark code array such that isMarked(i) is true
1588  * only for nodes that are alive.
1589  */
1590 static void
1591 mark_code(p)
1592         struct block *p;
1593 {
1594         cur_mark += 1;
1595         make_marks(p);
1596 }
1597
1598 /*
1599  * True iff the two stmt lists load the same value from the packet into
1600  * the accumulator.
1601  */
1602 static int
1603 eq_slist(x, y)
1604         struct slist *x, *y;
1605 {
1606         while (1) {
1607                 while (x && x->s.code == NOP)
1608                         x = x->next;
1609                 while (y && y->s.code == NOP)
1610                         y = y->next;
1611                 if (x == 0)
1612                         return y == 0;
1613                 if (y == 0)
1614                         return x == 0;
1615                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1616                         return 0;
1617                 x = x->next;
1618                 y = y->next;
1619         }
1620 }
1621
1622 static inline int
1623 eq_blk(b0, b1)
1624         struct block *b0, *b1;
1625 {
1626         if (b0->s.code == b1->s.code &&
1627             b0->s.k == b1->s.k &&
1628             b0->et.succ == b1->et.succ &&
1629             b0->ef.succ == b1->ef.succ)
1630                 return eq_slist(b0->stmts, b1->stmts);
1631         return 0;
1632 }
1633
1634 static void
1635 intern_blocks(root)
1636         struct block *root;
1637 {
1638         struct block *p;
1639         int i, j;
1640         int done;
1641  top:
1642         done = 1;
1643         for (i = 0; i < n_blocks; ++i)
1644                 blocks[i]->link = 0;
1645
1646         mark_code(root);
1647
1648         for (i = n_blocks - 1; --i >= 0; ) {
1649                 if (!isMarked(blocks[i]))
1650                         continue;
1651                 for (j = i + 1; j < n_blocks; ++j) {
1652                         if (!isMarked(blocks[j]))
1653                                 continue;
1654                         if (eq_blk(blocks[i], blocks[j])) {
1655                                 blocks[i]->link = blocks[j]->link ?
1656                                         blocks[j]->link : blocks[j];
1657                                 break;
1658                         }
1659                 }
1660         }
1661         for (i = 0; i < n_blocks; ++i) {
1662                 p = blocks[i];
1663                 if (JT(p) == 0)
1664                         continue;
1665                 if (JT(p)->link) {
1666                         done = 0;
1667                         JT(p) = JT(p)->link;
1668                 }
1669                 if (JF(p)->link) {
1670                         done = 0;
1671                         JF(p) = JF(p)->link;
1672                 }
1673         }
1674         if (!done)
1675                 goto top;
1676 }
1677
1678 static void
1679 opt_cleanup()
1680 {
1681         free((void *)vnode_base);
1682         free((void *)vmap);
1683         free((void *)edges);
1684         free((void *)space);
1685         free((void *)levels);
1686         free((void *)blocks);
1687 }
1688
1689 /*
1690  * Return the number of stmts in 's'.
1691  */
1692 static int
1693 slength(s)
1694         struct slist *s;
1695 {
1696         int n = 0;
1697
1698         for (; s; s = s->next)
1699                 if (s->s.code != NOP)
1700                         ++n;
1701         return n;
1702 }
1703
1704 /*
1705  * Return the number of nodes reachable by 'p'.
1706  * All nodes should be initially unmarked.
1707  */
1708 static int
1709 count_blocks(p)
1710         struct block *p;
1711 {
1712         if (p == 0 || isMarked(p))
1713                 return 0;
1714         Mark(p);
1715         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1716 }
1717
1718 /*
1719  * Do a depth first search on the flow graph, numbering the
1720  * the basic blocks, and entering them into the 'blocks' array.`
1721  */
1722 static void
1723 number_blks_r(p)
1724         struct block *p;
1725 {
1726         int n;
1727
1728         if (p == 0 || isMarked(p))
1729                 return;
1730
1731         Mark(p);
1732         n = n_blocks++;
1733         p->id = n;
1734         blocks[n] = p;
1735
1736         number_blks_r(JT(p));
1737         number_blks_r(JF(p));
1738 }
1739
1740 /*
1741  * Return the number of stmts in the flowgraph reachable by 'p'.
1742  * The nodes should be unmarked before calling.
1743  */
1744 static int
1745 count_stmts(p)
1746         struct block *p;
1747 {
1748         int n;
1749
1750         if (p == 0 || isMarked(p))
1751                 return 0;
1752         Mark(p);
1753         n = count_stmts(JT(p)) + count_stmts(JF(p));
1754         return slength(p->stmts) + n + 1;
1755 }
1756
1757 /*
1758  * Allocate memory.  All allocation is done before optimization
1759  * is begun.  A linear bound on the size of all data structures is computed
1760  * from the total number of blocks and/or statements.
1761  */
1762 static void
1763 opt_init(root)
1764         struct block *root;
1765 {
1766         u_long *p;
1767         int i, n, max_stmts;
1768
1769         /*
1770          * First, count the blocks, so we can malloc an array to map
1771          * block number to block.  Then, put the blocks into the array.
1772          */
1773         unMarkAll();
1774         n = count_blocks(root);
1775         blocks = (struct block **)malloc(n * sizeof(*blocks));
1776         unMarkAll();
1777         n_blocks = 0;
1778         number_blks_r(root);
1779
1780         n_edges = 2 * n_blocks;
1781         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1782
1783         /*
1784          * The number of levels is bounded by the number of nodes.
1785          */
1786         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1787
1788         edgewords = n_edges / (8 * sizeof(u_long)) + 1;
1789         nodewords = n_blocks / (8 * sizeof(u_long)) + 1;
1790
1791         /* XXX */
1792         space = (u_long *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1793                                  + n_edges * edgewords * sizeof(*space));
1794         p = space;
1795         all_dom_sets = p;
1796         for (i = 0; i < n; ++i) {
1797                 blocks[i]->dom = p;
1798                 p += nodewords;
1799         }
1800         all_closure_sets = p;
1801         for (i = 0; i < n; ++i) {
1802                 blocks[i]->closure = p;
1803                 p += nodewords;
1804         }
1805         all_edge_sets = p;
1806         for (i = 0; i < n; ++i) {
1807                 register struct block *b = blocks[i];
1808
1809                 b->et.edom = p;
1810                 p += edgewords;
1811                 b->ef.edom = p;
1812                 p += edgewords;
1813                 b->et.id = i;
1814                 edges[i] = &b->et;
1815                 b->ef.id = n_blocks + i;
1816                 edges[n_blocks + i] = &b->ef;
1817                 b->et.pred = b;
1818                 b->ef.pred = b;
1819         }
1820         max_stmts = 0;
1821         for (i = 0; i < n; ++i)
1822                 max_stmts += slength(blocks[i]->stmts) + 1;
1823         /*
1824          * We allocate at most 3 value numbers per statement,
1825          * so this is an upper bound on the number of valnodes
1826          * we'll need.
1827          */
1828         maxval = 3 * max_stmts;
1829         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1830         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1831 }
1832
1833 /*
1834  * Some pointers used to convert the basic block form of the code,
1835  * into the array form that BPF requires.  'fstart' will point to
1836  * the malloc'd array while 'ftail' is used during the recursive traversal.
1837  */
1838 static struct bpf_insn *fstart;
1839 static struct bpf_insn *ftail;
1840
1841 #ifdef BDEBUG
1842 int bids[1000];
1843 #endif
1844
1845 static void
1846 convert_code_r(p)
1847         struct block *p;
1848 {
1849         struct bpf_insn *dst;
1850         struct slist *src;
1851         int slen;
1852         u_int off;
1853
1854         if (p == 0 || isMarked(p))
1855                 return;
1856         Mark(p);
1857
1858         convert_code_r(JF(p));
1859         convert_code_r(JT(p));
1860
1861         slen = slength(p->stmts);
1862         dst = ftail -= slen + 1;
1863
1864         p->offset = dst - fstart;
1865
1866         for (src = p->stmts; src; src = src->next) {
1867                 if (src->s.code == NOP)
1868                         continue;
1869                 dst->code = (u_short)src->s.code;
1870                 dst->k = src->s.k;
1871                 ++dst;
1872         }
1873 #ifdef BDEBUG
1874         bids[dst - fstart] = p->id + 1;
1875 #endif
1876         dst->code = (u_short)p->s.code;
1877         dst->k = p->s.k;
1878         if (JT(p)) {
1879                 off = JT(p)->offset - (p->offset + slen) - 1;
1880                 if (off >= 256)
1881                         bpf_error("long jumps not supported");
1882                 dst->jt = off;
1883                 off = JF(p)->offset - (p->offset + slen) - 1;
1884                 if (off >= 256)
1885                         bpf_error("long jumps not supported");
1886                 dst->jf = off;
1887         }
1888 }
1889
1890
1891 /*
1892  * Convert flowgraph intermediate representation to the
1893  * BPF array representation.  Set *lenp to the number of instructions.
1894  */
1895 struct bpf_insn *
1896 icode_to_fcode(root, lenp)
1897         struct block *root;
1898         int *lenp;
1899 {
1900         int n;
1901         struct bpf_insn *fp;
1902
1903         unMarkAll();
1904         n = *lenp = count_stmts(root);
1905
1906         fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
1907         memset((char *)fp, 0, sizeof(*fp) * n);
1908         fstart = fp;
1909         ftail = fp + n;
1910
1911         unMarkAll();
1912         convert_code_r(root);
1913
1914         return fp;
1915 }
1916
1917 #ifdef BDEBUG
1918 opt_dump(root)
1919         struct block *root;
1920 {
1921         struct bpf_program f;
1922
1923         memset(bids, 0, sizeof bids);
1924         f.bf_insns = icode_to_fcode(root, &f.bf_len);
1925         bpf_dump(&f, 1);
1926         putchar('\n');
1927         free((char *)f.bf_insns);
1928 }
1929 #endif