ovn/lib/expr.c

   1 /*
   2  * Copyright (c) 2015 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <config.h>
  18 #include "expr.h"
  19 #include "dynamic-string.h"
  20 #include "json.h"
  21 #include "lex.h"
  22 #include "match.h"
  23 #include "ofp-actions.h"
  24 #include "shash.h"
  25 #include "simap.h"
  26 #include "openvswitch/vlog.h"
  27
  28 VLOG_DEFINE_THIS_MODULE(expr);
  29 \f
  30 /* Returns the name of measurement level 'level'. */
  31 const char *
  32 expr_level_to_string(enum expr_level level)
  33 {
  34     switch (level) {
  35     case EXPR_L_NOMINAL: return "nominal";
  36     case EXPR_L_BOOLEAN: return "Boolean";
  37     case EXPR_L_ORDINAL: return "ordinal";
  38     default: OVS_NOT_REACHED();
  39     }
  40 }
  41 \f
  42 /* Relational operators. */
  43
  44 /* Returns a string form of relational operator 'relop'. */
  45 const char *
  46 expr_relop_to_string(enum expr_relop relop)
  47 {
  48     switch (relop) {
  49     case EXPR_R_EQ: return "==";
  50     case EXPR_R_NE: return "!=";
  51     case EXPR_R_LT: return "<";
  52     case EXPR_R_LE: return "<=";
  53     case EXPR_R_GT: return ">";
  54     case EXPR_R_GE: return ">=";
  55     default: OVS_NOT_REACHED();
  56     }
  57 }
  58
  59 bool
  60 expr_relop_from_token(enum lex_type type, enum expr_relop *relop)
  61 {
  62     enum expr_relop r;
  63
  64     switch ((int) type) {
  65     case LEX_T_EQ: r = EXPR_R_EQ; break;
  66     case LEX_T_NE: r = EXPR_R_NE; break;
  67     case LEX_T_LT: r = EXPR_R_LT; break;
  68     case LEX_T_LE: r = EXPR_R_LE; break;
  69     case LEX_T_GT: r = EXPR_R_GT; break;
  70     case LEX_T_GE: r = EXPR_R_GE; break;
  71     default: return false;
  72     }
  73
  74     if (relop) {
  75         *relop = r;
  76     }
  77     return true;
  78 }
  79
  80 /* Returns the relational operator that 'relop' becomes if you turn the
  81  * relation's operands around, e.g. EXPR_R_EQ does not change because "a == b"
  82  * and "b == a" are equivalent, but EXPR_R_LE becomes EXPR_R_GE because "a <=
  83  * b" is equivalent to "b >= a". */
  84 static enum expr_relop
  85 expr_relop_turn(enum expr_relop relop)
  86 {
  87     switch (relop) {
  88     case EXPR_R_EQ: return EXPR_R_EQ;
  89     case EXPR_R_NE: return EXPR_R_NE;
  90     case EXPR_R_LT: return EXPR_R_GT;
  91     case EXPR_R_LE: return EXPR_R_GE;
  92     case EXPR_R_GT: return EXPR_R_LT;
  93     case EXPR_R_GE: return EXPR_R_LE;
  94     default: OVS_NOT_REACHED();
  95     }
  96 }
  97
  98 /* Returns the relational operator that is the opposite of 'relop'. */
  99 static enum expr_relop
 100 expr_relop_invert(enum expr_relop relop)
 101 {
 102     switch (relop) {
 103     case EXPR_R_EQ: return EXPR_R_NE;
 104     case EXPR_R_NE: return EXPR_R_EQ;
 105     case EXPR_R_LT: return EXPR_R_GE;
 106     case EXPR_R_LE: return EXPR_R_GT;
 107     case EXPR_R_GT: return EXPR_R_LE;
 108     case EXPR_R_GE: return EXPR_R_LT;
 109     default: OVS_NOT_REACHED();
 110     }
 111 }
 112 \f
 113 /* Constructing and manipulating expressions. */
 114
 115 /* Creates and returns a logical AND or OR expression (according to 'type',
 116  * which must be EXPR_T_AND or EXPR_T_OR) that initially has no
 117  * sub-expressions.  (To satisfy the invariants for expressions, the caller
 118  * must add at least two sub-expressions whose types are different from
 119  * 'type'.) */
 120 struct expr *
 121 expr_create_andor(enum expr_type type)
 122 {
 123     struct expr *e = xmalloc(sizeof *e);
 124     e->type = type;
 125     list_init(&e->andor);
 126     return e;
 127 }
 128
 129 /* Returns a logical AND or OR expression (according to 'type', which must be
 130  * EXPR_T_AND or EXPR_T_OR) whose sub-expressions are 'a' and 'b', with some
 131  * flexibility:
 132  *
 133  *     - If 'a' or 'b' is NULL, just returns the other one (which means that if
 134  *       that other one is not of the given 'type', then the returned
 135  *       expression is not either).
 136  *
 137  *     - If 'a' or 'b', or both, have type 'type', then they are combined into
 138  *       a single node that satisfies the invariants for expressions. */
 139 struct expr *
 140 expr_combine(enum expr_type type, struct expr *a, struct expr *b)
 141 {
 142     if (!a) {
 143         return b;
 144     } else if (!b) {
 145         return a;
 146     } else if (a->type == type) {
 147         if (b->type == type) {
 148             list_splice(&a->andor, b->andor.next, &b->andor);
 149             free(b);
 150         } else {
 151             list_push_back(&a->andor, &b->node);
 152         }
 153         return a;
 154     } else if (b->type == type) {
 155         list_push_front(&b->andor, &a->node);
 156         return b;
 157     } else {
 158         struct expr *e = expr_create_andor(type);
 159         list_push_back(&e->andor, &a->node);
 160         list_push_back(&e->andor, &b->node);
 161         return e;
 162     }
 163 }
 164
 165 static void
 166 expr_insert_andor(struct expr *andor, struct expr *before, struct expr *new)
 167 {
 168     if (new->type == andor->type) {
 169         if (andor->type == EXPR_T_AND) {
 170             /* Conjunction junction, what's your function? */
 171         }
 172         list_splice(&before->node, new->andor.next, &new->andor);
 173         free(new);
 174     } else {
 175         list_insert(&before->node, &new->node);
 176     }
 177 }
 178
 179 /* Returns an EXPR_T_BOOLEAN expression with value 'b'. */
 180 struct expr *
 181 expr_create_boolean(bool b)
 182 {
 183     struct expr *e = xmalloc(sizeof *e);
 184     e->type = EXPR_T_BOOLEAN;
 185     e->boolean = b;
 186     return e;
 187 }
 188
 189 static void
 190 expr_not(struct expr *expr)
 191 {
 192     struct expr *sub;
 193
 194     switch (expr->type) {
 195     case EXPR_T_CMP:
 196         expr->cmp.relop = expr_relop_invert(expr->cmp.relop);
 197         break;
 198
 199     case EXPR_T_AND:
 200     case EXPR_T_OR:
 201         LIST_FOR_EACH (sub, node, &expr->andor) {
 202             expr_not(sub);
 203         }
 204         expr->type = expr->type == EXPR_T_AND ? EXPR_T_OR : EXPR_T_AND;
 205         break;
 206
 207     case EXPR_T_BOOLEAN:
 208         expr->boolean = !expr->boolean;
 209         break;
 210     default:
 211         OVS_NOT_REACHED();
 212     }
 213 }
 214
 215 static struct expr *
 216 expr_fix_andor(struct expr *expr, bool short_circuit)
 217 {
 218     struct expr *sub, *next;
 219
 220     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
 221         if (sub->type == EXPR_T_BOOLEAN) {
 222             if (sub->boolean == short_circuit) {
 223                 expr_destroy(expr);
 224                 return expr_create_boolean(short_circuit);
 225             } else {
 226                 list_remove(&sub->node);
 227                 expr_destroy(sub);
 228             }
 229         }
 230     }
 231
 232     if (list_is_short(&expr->andor)) {
 233         if (list_is_empty(&expr->andor)) {
 234             free(expr);
 235             return expr_create_boolean(!short_circuit);
 236         } else {
 237             sub = expr_from_node(list_front(&expr->andor));
 238             free(expr);
 239             return sub;
 240         }
 241     } else {
 242         return expr;
 243     }
 244 }
 245
 246 /* Returns 'expr' modified so that top-level oddities are fixed up:
 247  *
 248  *     - Eliminates any EXPR_T_BOOLEAN operands at the top level.
 249  *
 250  *     - Replaces one-operand EXPR_T_AND or EXPR_T_OR by its subexpression. */
 251 static struct expr *
 252 expr_fix(struct expr *expr)
 253 {
 254     switch (expr->type) {
 255     case EXPR_T_CMP:
 256         return expr;
 257
 258     case EXPR_T_AND:
 259         return expr_fix_andor(expr, false);
 260
 261     case EXPR_T_OR:
 262         return expr_fix_andor(expr, true);
 263
 264     case EXPR_T_BOOLEAN:
 265         return expr;
 266
 267     default:
 268         OVS_NOT_REACHED();
 269     }
 270 }
 271 \f
 272 /* Formatting. */
 273
 274 static void
 275 find_bitwise_range(const union mf_subvalue *sv, int width,
 276                    int *startp, int *n_bitsp)
 277 {
 278     unsigned int start = bitwise_scan(sv, sizeof *sv, true, 0, width);
 279     if (start < width) {
 280         unsigned int end = bitwise_scan(sv, sizeof *sv, false, start, width);
 281         if (end >= width
 282             || bitwise_scan(sv, sizeof *sv, true, end, width) >= width) {
 283             *startp = start;
 284             *n_bitsp = end - start;
 285             return;
 286         }
 287     }
 288     *startp = *n_bitsp = 0;
 289 }
 290
 291 static void
 292 expr_format_cmp(const struct expr *e, struct ds *s)
 293 {
 294     /* The common case is numerical comparisons.
 295      * Handle string comparisons as a special case. */
 296     if (!e->cmp.symbol->width) {
 297         ds_put_format(s, "%s %s ", e->cmp.symbol->name,
 298                       expr_relop_to_string(e->cmp.relop));
 299         json_string_escape(e->cmp.string, s);
 300         return;
 301     }
 302
 303     int ofs, n;
 304     find_bitwise_range(&e->cmp.mask, e->cmp.symbol->width, &ofs, &n);
 305     if (n == 1 && (e->cmp.relop == EXPR_R_EQ || e->cmp.relop == EXPR_R_NE)) {
 306         bool positive;
 307
 308         positive = bitwise_get_bit(&e->cmp.value, sizeof e->cmp.value, ofs);
 309         positive ^= e->cmp.relop == EXPR_R_NE;
 310         if (!positive) {
 311             ds_put_char(s, '!');
 312         }
 313         ds_put_cstr(s, e->cmp.symbol->name);
 314         if (e->cmp.symbol->width > 1) {
 315             ds_put_format(s, "[%d]", ofs);
 316         }
 317         return;
 318     }
 319
 320     ds_put_cstr(s, e->cmp.symbol->name);
 321     if (n > 0 && n < e->cmp.symbol->width) {
 322         if (n > 1) {
 323             ds_put_format(s, "[%d..%d]", ofs, ofs + n - 1);
 324         } else {
 325             ds_put_format(s, "[%d]", ofs);
 326         }
 327     }
 328
 329     ds_put_format(s, " %s ", expr_relop_to_string(e->cmp.relop));
 330
 331     if (n) {
 332         union mf_subvalue value;
 333
 334         memset(&value, 0, sizeof value);
 335         bitwise_copy(&e->cmp.value, sizeof e->cmp.value, ofs,
 336                      &value, sizeof value, 0,
 337                      n);
 338         mf_format_subvalue(&value, s);
 339     } else {
 340         mf_format_subvalue(&e->cmp.value, s);
 341         ds_put_char(s, '/');
 342         mf_format_subvalue(&e->cmp.mask, s);
 343     }
 344 }
 345
 346 static void
 347 expr_format_andor(const struct expr *e, const char *op, struct ds *s)
 348 {
 349     struct expr *sub;
 350     int i = 0;
 351
 352     LIST_FOR_EACH (sub, node, &e->andor) {
 353         if (i++) {
 354             ds_put_format(s, " %s ", op);
 355         }
 356
 357         if (sub->type == EXPR_T_AND || sub->type == EXPR_T_OR) {
 358             ds_put_char(s, '(');
 359             expr_format(sub, s);
 360             ds_put_char(s, ')');
 361         } else {
 362             expr_format(sub, s);
 363         }
 364     }
 365 }
 366
 367 /* Appends a string form of 'e' to 's'.  The string form is acceptable for
 368  * parsing back into an equivalent expression. */
 369 void
 370 expr_format(const struct expr *e, struct ds *s)
 371 {
 372     switch (e->type) {
 373     case EXPR_T_CMP:
 374         expr_format_cmp(e, s);
 375         break;
 376
 377     case EXPR_T_AND:
 378         expr_format_andor(e, "&&", s);
 379         break;
 380
 381     case EXPR_T_OR:
 382         expr_format_andor(e, "||", s);
 383         break;
 384
 385     case EXPR_T_BOOLEAN:
 386         ds_put_char(s, e->boolean ? '1' : '0');
 387         break;
 388     }
 389 }
 390
 391 /* Prints a string form of 'e' on stdout, followed by a new-line. */
 392 void
 393 expr_print(const struct expr *e)
 394 {
 395     struct ds output;
 396
 397     ds_init(&output);
 398     expr_format(e, &output);
 399     puts(ds_cstr(&output));
 400     ds_destroy(&output);
 401 }
 402 \f
 403 /* Parsing. */
 404
 405 /* Type of a "union expr_constant" or "struct expr_constant_set". */
 406 enum expr_constant_type {
 407     EXPR_C_INTEGER,
 408     EXPR_C_STRING
 409 };
 410
 411 /* A string or integer constant (one must know which from context). */
 412 union expr_constant {
 413     /* Integer constant.
 414      *
 415      * The width of a constant isn't always clear, e.g. if you write "1",
 416      * there's no way to tell whether you mean for that to be a 1-bit constant
 417      * or a 128-bit constant or somewhere in between. */
 418     struct {
 419         union mf_subvalue value;
 420         union mf_subvalue mask; /* Only initialized if 'masked'. */
 421         bool masked;
 422
 423         enum lex_format format; /* From the constant's lex_token. */
 424     };
 425
 426     /* Null-terminated string constant. */
 427     char *string;
 428 };
 429
 430 /* A collection of "union expr_constant"s of the same type. */
 431 struct expr_constant_set {
 432     union expr_constant *values;  /* Constants. */
 433     size_t n_values;              /* Number of constants. */
 434     enum expr_constant_type type; /* Type of the constants. */
 435     bool in_curlies;              /* Whether the constants were in {}. */
 436 };
 437
 438 /* A reference to a symbol or a subfield of a symbol.
 439  *
 440  * For string fields, ofs and n_bits are 0. */
 441 struct expr_field {
 442     const struct expr_symbol *symbol; /* The symbol. */
 443     int ofs;                          /* Starting bit offset. */
 444     int n_bits;                       /* Number of bits. */
 445 };
 446
 447 /* Context maintained during expr_parse(). */
 448 struct expr_context {
 449     struct lexer *lexer;        /* Lexer for pulling more tokens. */
 450     const struct shash *symtab; /* Symbol table. */
 451     char *error;                /* Error, if any, otherwise NULL. */
 452     bool not;                   /* True inside odd number of NOT operators. */
 453 };
 454
 455 struct expr *expr_parse__(struct expr_context *);
 456 static void expr_not(struct expr *);
 457 static void expr_constant_set_destroy(struct expr_constant_set *);
 458 static bool parse_field(struct expr_context *, struct expr_field *);
 459
 460 static bool
 461 expr_error_handle_common(struct expr_context *ctx)
 462 {
 463     if (ctx->error) {
 464         /* Already have an error, suppress this one since the cascade seems
 465          * unlikely to be useful. */
 466         return true;
 467     } else if (ctx->lexer->token.type == LEX_T_ERROR) {
 468         /* The lexer signaled an error.  Nothing at the expression level
 469          * accepts an error token, so we'll inevitably end up here with some
 470          * meaningless parse error.  Report the lexical error instead. */
 471         ctx->error = xstrdup(ctx->lexer->token.s);
 472         return true;
 473     } else {
 474         return false;
 475     }
 476 }
 477
 478 static void OVS_PRINTF_FORMAT(2, 3)
 479 expr_error(struct expr_context *ctx, const char *message, ...)
 480 {
 481     if (expr_error_handle_common(ctx)) {
 482         return;
 483     }
 484
 485     va_list args;
 486     va_start(args, message);
 487     ctx->error = xvasprintf(message, args);
 488     va_end(args);
 489 }
 490
 491 static void OVS_PRINTF_FORMAT(2, 3)
 492 expr_syntax_error(struct expr_context *ctx, const char *message, ...)
 493 {
 494     if (expr_error_handle_common(ctx)) {
 495         return;
 496     }
 497
 498     struct ds s;
 499
 500     ds_init(&s);
 501     ds_put_cstr(&s, "Syntax error ");
 502     if (ctx->lexer->token.type == LEX_T_END) {
 503         ds_put_cstr(&s, "at end of input ");
 504     } else if (ctx->lexer->start) {
 505         ds_put_format(&s, "at `%.*s' ",
 506                       (int) (ctx->lexer->input - ctx->lexer->start),
 507                       ctx->lexer->start);
 508     }
 509
 510     va_list args;
 511     va_start(args, message);
 512     ds_put_format_valist(&s, message, args);
 513     va_end(args);
 514
 515     ctx->error = ds_steal_cstr(&s);
 516 }
 517
 518 static struct expr *
 519 make_cmp__(const struct expr_field *f, enum expr_relop r,
 520              const union expr_constant *c)
 521 {
 522     struct expr *e = xzalloc(sizeof *e);
 523     e->type = EXPR_T_CMP;
 524     e->cmp.symbol = f->symbol;
 525     e->cmp.relop = r;
 526     if (f->symbol->width) {
 527         bitwise_copy(&c->value, sizeof c->value, 0,
 528                      &e->cmp.value, sizeof e->cmp.value, f->ofs,
 529                      f->n_bits);
 530         if (c->masked) {
 531             bitwise_copy(&c->mask, sizeof c->mask, 0,
 532                          &e->cmp.mask, sizeof e->cmp.mask, f->ofs,
 533                          f->n_bits);
 534         } else {
 535             bitwise_one(&e->cmp.mask, sizeof e->cmp.mask, f->ofs,
 536                         f->n_bits);
 537         }
 538     } else {
 539         e->cmp.string = xstrdup(c->string);
 540     }
 541     return e;
 542 }
 543
 544 /* Returns the minimum reasonable width for integer constant 'c'. */
 545 static int
 546 expr_constant_width(const union expr_constant *c)
 547 {
 548     if (c->masked) {
 549         return mf_subvalue_width(&c->mask);
 550     }
 551
 552     switch (c->format) {
 553     case LEX_F_DECIMAL:
 554     case LEX_F_HEXADECIMAL:
 555         return mf_subvalue_width(&c->value);
 556
 557     case LEX_F_IPV4:
 558         return 32;
 559
 560     case LEX_F_IPV6:
 561         return 128;
 562
 563     case LEX_F_ETHERNET:
 564         return 48;
 565
 566     default:
 567         OVS_NOT_REACHED();
 568     }
 569 }
 570
 571 static bool
 572 type_check(struct expr_context *ctx, const struct expr_field *f,
 573            struct expr_constant_set *cs)
 574 {
 575     if (cs->type != (f->symbol->width ? EXPR_C_INTEGER : EXPR_C_STRING)) {
 576         expr_error(ctx, "%s field %s is not compatible with %s constant.",
 577                    f->symbol->width ? "Integer" : "String",
 578                    f->symbol->name,
 579                    cs->type == EXPR_C_INTEGER ? "integer" : "string");
 580         return false;
 581     }
 582
 583     if (f->symbol->width) {
 584         for (size_t i = 0; i < cs->n_values; i++) {
 585             int w = expr_constant_width(&cs->values[i]);
 586             if (w > f->symbol->width) {
 587                 expr_error(ctx, "%d-bit constant is not compatible with "
 588                            "%d-bit field %s.",
 589                            w, f->symbol->width, f->symbol->name);
 590                 return false;
 591             }
 592         }
 593     }
 594
 595     return true;
 596 }
 597
 598 static struct expr *
 599 make_cmp(struct expr_context *ctx,
 600          const struct expr_field *f, enum expr_relop r,
 601          struct expr_constant_set *cs)
 602 {
 603     struct expr *e = NULL;
 604
 605     if (!type_check(ctx, f, cs)) {
 606         goto exit;
 607     }
 608
 609     if (r != EXPR_R_EQ && r != EXPR_R_NE) {
 610         if (cs->in_curlies) {
 611             expr_error(ctx, "Only == and != operators may be used "
 612                        "with value sets.");
 613             goto exit;
 614         }
 615         if (f->symbol->level == EXPR_L_NOMINAL ||
 616             f->symbol->level == EXPR_L_BOOLEAN) {
 617             expr_error(ctx, "Only == and != operators may be used "
 618                        "with %s field %s.",
 619                        expr_level_to_string(f->symbol->level),
 620                        f->symbol->name);
 621             goto exit;
 622         }
 623         if (cs->values[0].masked) {
 624             expr_error(ctx, "Only == and != operators may be used with "
 625                        "masked constants.  Consider using subfields instead "
 626                        "(e.g. eth.src[0..15] > 0x1111 in place of "
 627                        "eth.src > 00:00:00:00:11:11/00:00:00:00:ff:ff).");
 628             goto exit;
 629         }
 630     }
 631
 632     if (f->symbol->level == EXPR_L_NOMINAL) {
 633         if (f->symbol->expansion) {
 634             ovs_assert(f->symbol->width > 0);
 635             for (size_t i = 0; i < cs->n_values; i++) {
 636                 const union mf_subvalue *value = &cs->values[i].value;
 637                 bool positive = (value->integer & htonll(1)) != 0;
 638                 positive ^= r == EXPR_R_NE;
 639                 positive ^= ctx->not;
 640                 if (!positive) {
 641                     const char *name = f->symbol->name;
 642                     expr_error(ctx, "Nominal predicate %s may only be tested "
 643                                "positively, e.g. `%s' or `%s == 1' but not "
 644                                "`!%s' or `%s == 0'.",
 645                                name, name, name, name, name);
 646                     goto exit;
 647                 }
 648             }
 649         } else if (r != (ctx->not ? EXPR_R_NE : EXPR_R_EQ)) {
 650             expr_error(ctx, "Nominal field %s may only be tested for "
 651                        "equality (taking enclosing `!' operators into "
 652                        "account).", f->symbol->name);
 653             goto exit;
 654         }
 655     }
 656
 657     e = make_cmp__(f, r, &cs->values[0]);
 658     for (size_t i = 1; i < cs->n_values; i++) {
 659         e = expr_combine(r == EXPR_R_EQ ? EXPR_T_OR : EXPR_T_AND,
 660                          e, make_cmp__(f, r, &cs->values[i]));
 661     }
 662 exit:
 663     expr_constant_set_destroy(cs);
 664     return e;
 665 }
 666
 667 static bool
 668 expr_get_int(struct expr_context *ctx, int *value)
 669 {
 670     if (ctx->lexer->token.type == LEX_T_INTEGER
 671         && ctx->lexer->token.format == LEX_F_DECIMAL
 672         && ntohll(ctx->lexer->token.value.integer) <= INT_MAX) {
 673         *value = ntohll(ctx->lexer->token.value.integer);
 674         lexer_get(ctx->lexer);
 675         return true;
 676     } else {
 677         expr_syntax_error(ctx, "expecting small integer.");
 678         return false;
 679     }
 680 }
 681
 682 static bool
 683 parse_field(struct expr_context *ctx, struct expr_field *f)
 684 {
 685     const struct expr_symbol *symbol;
 686
 687     if (ctx->lexer->token.type != LEX_T_ID) {
 688         expr_syntax_error(ctx, "expecting field name.");
 689         return false;
 690     }
 691
 692     symbol = shash_find_data(ctx->symtab, ctx->lexer->token.s);
 693     if (!symbol) {
 694         expr_syntax_error(ctx, "expecting field name.");
 695         return false;
 696     }
 697     lexer_get(ctx->lexer);
 698
 699     f->symbol = symbol;
 700     if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
 701         int low, high;
 702
 703         if (!symbol->width) {
 704             expr_error(ctx, "Cannot select subfield of string field %s.",
 705                        symbol->name);
 706             return false;
 707         }
 708
 709         if (!expr_get_int(ctx, &low)) {
 710             return false;
 711         }
 712         if (lexer_match(ctx->lexer, LEX_T_ELLIPSIS)) {
 713             if (!expr_get_int(ctx, &high)) {
 714                 return false;
 715             }
 716         } else {
 717             high = low;
 718         }
 719
 720         if (!lexer_match(ctx->lexer, LEX_T_RSQUARE)) {
 721             expr_syntax_error(ctx, "expecting `]'.");
 722             return false;
 723         }
 724
 725         if (low > high) {
 726             expr_error(ctx, "Invalid bit range %d to %d.", low, high);
 727             return false;
 728         } else if (high >= symbol->width) {
 729             expr_error(ctx, "Cannot select bits %d to %d of %d-bit field %s.",
 730                        low, high, symbol->width, symbol->name);
 731             return false;
 732         } else if (symbol->level == EXPR_L_NOMINAL
 733                    && (low != 0 || high != symbol->width - 1)) {
 734             expr_error(ctx, "Cannot select subfield of nominal field %s.",
 735                        symbol->name);
 736             return false;
 737         }
 738
 739         f->ofs = low;
 740         f->n_bits = high - low + 1;
 741     } else {
 742         f->ofs = 0;
 743         f->n_bits = symbol->width;
 744     }
 745
 746     return true;
 747 }
 748
 749 static bool
 750 parse_relop(struct expr_context *ctx, enum expr_relop *relop)
 751 {
 752     if (expr_relop_from_token(ctx->lexer->token.type, relop)) {
 753         lexer_get(ctx->lexer);
 754         return true;
 755     } else {
 756         expr_syntax_error(ctx, "expecting relational operator.");
 757         return false;
 758     }
 759 }
 760
 761 static bool
 762 assign_constant_set_type(struct expr_context *ctx,
 763                          struct expr_constant_set *cs,
 764                          enum expr_constant_type type)
 765 {
 766     if (!cs->n_values || cs->type == type) {
 767         cs->type = type;
 768         return true;
 769     } else {
 770         expr_syntax_error(ctx, "expecting %s.",
 771                           cs->type == EXPR_C_INTEGER ? "integer" : "string");
 772         return false;
 773     }
 774 }
 775
 776 static bool
 777 parse_constant(struct expr_context *ctx, struct expr_constant_set *cs,
 778                size_t *allocated_values)
 779 {
 780     if (cs->n_values >= *allocated_values) {
 781         cs->values = x2nrealloc(cs->values, allocated_values,
 782                                 sizeof *cs->values);
 783     }
 784
 785     if (ctx->lexer->token.type == LEX_T_STRING) {
 786         if (!assign_constant_set_type(ctx, cs, EXPR_C_STRING)) {
 787             return false;
 788         }
 789         cs->values[cs->n_values++].string = xstrdup(ctx->lexer->token.s);
 790         lexer_get(ctx->lexer);
 791         return true;
 792     } else if (ctx->lexer->token.type == LEX_T_INTEGER ||
 793                ctx->lexer->token.type == LEX_T_MASKED_INTEGER) {
 794         if (!assign_constant_set_type(ctx, cs, EXPR_C_INTEGER)) {
 795             return false;
 796         }
 797
 798         union expr_constant *c = &cs->values[cs->n_values++];
 799         c->value = ctx->lexer->token.value;
 800         c->format = ctx->lexer->token.format;
 801         c->masked = ctx->lexer->token.type == LEX_T_MASKED_INTEGER;
 802         if (c->masked) {
 803             c->mask = ctx->lexer->token.mask;
 804         }
 805         lexer_get(ctx->lexer);
 806         return true;
 807     } else {
 808         expr_syntax_error(ctx, "expecting constant.");
 809         return false;
 810     }
 811 }
 812
 813 /* Parses a single or {}-enclosed set of integer or string constants into 'cs',
 814  * which the caller need not have initialized.  Returns true on success, in
 815  * which case the caller owns 'cs', false on failure, in which case 'cs' is
 816  * indeterminate. */
 817 static bool
 818 parse_constant_set(struct expr_context *ctx, struct expr_constant_set *cs)
 819 {
 820     size_t allocated_values = 0;
 821     bool ok;
 822
 823     memset(cs, 0, sizeof *cs);
 824     if (lexer_match(ctx->lexer, LEX_T_LCURLY)) {
 825         ok = true;
 826         cs->in_curlies = true;
 827         do {
 828             if (!parse_constant(ctx, cs, &allocated_values)) {
 829                 ok = false;
 830                 break;
 831             }
 832             lexer_match(ctx->lexer, LEX_T_COMMA);
 833         } while (!lexer_match(ctx->lexer, LEX_T_RCURLY));
 834     } else {
 835         ok = parse_constant(ctx, cs, &allocated_values);
 836     }
 837     if (!ok) {
 838         expr_constant_set_destroy(cs);
 839     }
 840     return ok;
 841 }
 842
 843 static void
 844 expr_constant_set_destroy(struct expr_constant_set *cs)
 845 {
 846     if (cs) {
 847         if (cs->type == EXPR_C_STRING) {
 848             for (size_t i = 0; i < cs->n_values; i++) {
 849                 free(cs->values[i].string);
 850             }
 851         }
 852         free(cs->values);
 853     }
 854 }
 855
 856 static struct expr *
 857 expr_parse_primary(struct expr_context *ctx, bool *atomic)
 858 {
 859     *atomic = false;
 860     if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
 861         struct expr *e = expr_parse__(ctx);
 862         if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
 863             expr_destroy(e);
 864             expr_syntax_error(ctx, "expecting `)'.");
 865             return NULL;
 866         }
 867         *atomic = true;
 868         return e;
 869     }
 870
 871     if (ctx->lexer->token.type == LEX_T_ID) {
 872         struct expr_field f;
 873         enum expr_relop r;
 874         struct expr_constant_set c;
 875
 876         if (!parse_field(ctx, &f)) {
 877             return NULL;
 878         }
 879
 880         if (!expr_relop_from_token(ctx->lexer->token.type, &r)) {
 881             if (f.n_bits > 1 && !ctx->not) {
 882                 expr_error(ctx, "Explicit `!= 0' is required for inequality "
 883                            "test of multibit field against 0.");
 884                 return NULL;
 885             }
 886
 887             *atomic = true;
 888
 889             union expr_constant *cst = xzalloc(sizeof *cst);
 890             cst->format = LEX_F_HEXADECIMAL;
 891             cst->masked = false;
 892
 893             c.type = EXPR_C_INTEGER;
 894             c.values = cst;
 895             c.n_values = 1;
 896             c.in_curlies = false;
 897             return make_cmp(ctx, &f, EXPR_R_NE, &c);
 898         } else if (parse_relop(ctx, &r) && parse_constant_set(ctx, &c)) {
 899             return make_cmp(ctx, &f, r, &c);
 900         } else {
 901             return NULL;
 902         }
 903     } else {
 904         struct expr_constant_set c1;
 905         if (!parse_constant_set(ctx, &c1)) {
 906             return NULL;
 907         }
 908
 909         if (!expr_relop_from_token(ctx->lexer->token.type, NULL)
 910             && c1.n_values == 1
 911             && c1.type == EXPR_C_INTEGER
 912             && c1.values[0].format == LEX_F_DECIMAL
 913             && !c1.values[0].masked
 914             && !c1.in_curlies) {
 915             uint64_t x = ntohll(c1.values[0].value.integer);
 916             if (x <= 1) {
 917                 *atomic = true;
 918                 expr_constant_set_destroy(&c1);
 919                 return expr_create_boolean(x);
 920             }
 921         }
 922
 923         enum expr_relop r1;
 924         struct expr_field f;
 925         if (!parse_relop(ctx, &r1) || !parse_field(ctx, &f)) {
 926             expr_constant_set_destroy(&c1);
 927             return NULL;
 928         }
 929
 930         if (!expr_relop_from_token(ctx->lexer->token.type, NULL)) {
 931             return make_cmp(ctx, &f, expr_relop_turn(r1), &c1);
 932         }
 933
 934         enum expr_relop r2;
 935         struct expr_constant_set c2;
 936         if (!parse_relop(ctx, &r2) || !parse_constant_set(ctx, &c2)) {
 937             expr_constant_set_destroy(&c1);
 938             return NULL;
 939         } else {
 940             /* Reject "1 == field == 2", "1 < field > 2", and so on. */
 941             if (!(((r1 == EXPR_R_LT || r1 == EXPR_R_LE) &&
 942                    (r2 == EXPR_R_LT || r2 == EXPR_R_LE)) ||
 943                   ((r1 == EXPR_R_GT || r1 == EXPR_R_GE) &&
 944                    (r2 == EXPR_R_GT || r2 == EXPR_R_GE)))) {
 945                 expr_error(ctx, "Range expressions must have the form "
 946                            "`x < field < y' or `x > field > y', with each "
 947                            "`<' optionally replaced by `<=' or `>' by `>=').");
 948                 expr_constant_set_destroy(&c1);
 949                 expr_constant_set_destroy(&c2);
 950                 return NULL;
 951             }
 952
 953             struct expr *e1 = make_cmp(ctx, &f, expr_relop_turn(r1), &c1);
 954             struct expr *e2 = make_cmp(ctx, &f, r2, &c2);
 955             if (ctx->error) {
 956                 expr_destroy(e1);
 957                 expr_destroy(e2);
 958                 return NULL;
 959             }
 960             return expr_combine(EXPR_T_AND, e1, e2);
 961         }
 962     }
 963 }
 964
 965 static struct expr *
 966 expr_parse_not(struct expr_context *ctx)
 967 {
 968     bool atomic;
 969
 970     if (lexer_match(ctx->lexer, LEX_T_LOG_NOT)) {
 971         ctx->not = !ctx->not;
 972         struct expr *expr = expr_parse_primary(ctx, &atomic);
 973         ctx->not = !ctx->not;
 974
 975         if (expr) {
 976             if (!atomic) {
 977                 expr_error(ctx, "Missing parentheses around operand of !.");
 978                 expr_destroy(expr);
 979                 return NULL;
 980             }
 981             expr_not(expr);
 982         }
 983         return expr;
 984     } else {
 985         return expr_parse_primary(ctx, &atomic);
 986     }
 987 }
 988
 989 struct expr *
 990 expr_parse__(struct expr_context *ctx)
 991 {
 992     struct expr *e = expr_parse_not(ctx);
 993     if (!e) {
 994         return NULL;
 995     }
 996
 997     enum lex_type lex_type = ctx->lexer->token.type;
 998     if (lex_type == LEX_T_LOG_AND || lex_type == LEX_T_LOG_OR) {
 999         enum expr_type expr_type
1000             = lex_type == LEX_T_LOG_AND ? EXPR_T_AND : EXPR_T_OR;
1001
1002         lexer_get(ctx->lexer);
1003         do {
1004             struct expr *e2 = expr_parse_not(ctx);
1005             if (!e2) {
1006                 expr_destroy(e);
1007                 return NULL;
1008             }
1009             e = expr_combine(expr_type, e, e2);
1010         } while (lexer_match(ctx->lexer, lex_type));
1011         if (ctx->lexer->token.type == LEX_T_LOG_AND
1012             || ctx->lexer->token.type == LEX_T_LOG_OR) {
1013             expr_destroy(e);
1014             expr_error(ctx,
1015                        "&& and || must be parenthesized when used together.");
1016             return NULL;
1017         }
1018     }
1019     return e;
1020 }
1021
1022 /* Parses an expression using the symbols in 'symtab' from 'lexer'.  If
1023  * successful, returns the new expression and sets '*errorp' to NULL.  On
1024  * failure, returns NULL and sets '*errorp' to an explanatory error message.
1025  * The caller must eventually free the returned expression (with
1026  * expr_destroy()) or error (with free()). */
1027 struct expr *
1028 expr_parse(struct lexer *lexer, const struct shash *symtab, char **errorp)
1029 {
1030     struct expr_context ctx;
1031
1032     ctx.lexer = lexer;
1033     ctx.symtab = symtab;
1034     ctx.error = NULL;
1035     ctx.not = false;
1036
1037     struct expr *e = expr_parse__(&ctx);
1038     *errorp = ctx.error;
1039     ovs_assert((ctx.error != NULL) != (e != NULL));
1040     return e;
1041 }
1042
1043 /* Like expr_parse(), but the expression is taken from 's'. */
1044 struct expr *
1045 expr_parse_string(const char *s, const struct shash *symtab, char **errorp)
1046 {
1047     struct lexer lexer;
1048     struct expr *expr;
1049
1050     lexer_init(&lexer, s);
1051     lexer_get(&lexer);
1052     expr = expr_parse(&lexer, symtab, errorp);
1053     if (!*errorp && lexer.token.type != LEX_T_END) {
1054         *errorp = xstrdup("Extra tokens at end of input.");
1055         expr_destroy(expr);
1056         expr = NULL;
1057     }
1058     lexer_destroy(&lexer);
1059
1060     return expr;
1061 }
1062 \f
1063 static struct expr_symbol *
1064 add_symbol(struct shash *symtab, const char *name, int width,
1065            const char *prereqs, enum expr_level level,
1066            bool must_crossproduct)
1067 {
1068     struct expr_symbol *symbol = xzalloc(sizeof *symbol);
1069     symbol->name = xstrdup(name);
1070     symbol->prereqs = prereqs && prereqs[0] ? xstrdup(prereqs) : NULL;
1071     symbol->width = width;
1072     symbol->level = level;
1073     symbol->must_crossproduct = must_crossproduct;
1074     shash_add_assert(symtab, symbol->name, symbol);
1075     return symbol;
1076 }
1077
1078 /* Adds field 'id' to symbol table 'symtab' under the given 'name'.  Whenever
1079  * 'name' is referenced, expression annotation (see expr_annotate()) will
1080  * ensure that 'prereqs' are also true.  If 'must_crossproduct' is true, then
1081  * conversion to flows will never attempt to use the field as a conjunctive
1082  * match dimension (see "Crossproducting" in the large comment on struct
1083  * expr_symbol in expr.h for an example).
1084  *
1085  * A given field 'id' must only be used for a single symbol in a symbol table.
1086  * Use subfields to duplicate or subset a field (you can even make a subfield
1087  * include all the bits of the "parent" field if you like). */
1088 struct expr_symbol *
1089 expr_symtab_add_field(struct shash *symtab, const char *name,
1090                       enum mf_field_id id, const char *prereqs,
1091                       bool must_crossproduct)
1092 {
1093     const struct mf_field *field = mf_from_id(id);
1094     struct expr_symbol *symbol;
1095
1096     symbol = add_symbol(symtab, name, field->n_bits, prereqs,
1097                         (field->maskable == MFM_FULLY
1098                          ? EXPR_L_ORDINAL
1099                          : EXPR_L_NOMINAL),
1100                         must_crossproduct);
1101     symbol->field = field;
1102     return symbol;
1103 }
1104
1105 static bool
1106 parse_field_from_string(const char *s, const struct shash *symtab,
1107                         struct expr_field *field, char **errorp)
1108 {
1109     struct lexer lexer;
1110     lexer_init(&lexer, s);
1111     lexer_get(&lexer);
1112
1113     struct expr_context ctx;
1114     ctx.lexer = &lexer;
1115     ctx.symtab = symtab;
1116     ctx.error = NULL;
1117     ctx.not = false;
1118
1119     bool ok = parse_field(&ctx, field);
1120     if (!ok) {
1121         *errorp = ctx.error;
1122     } else if (lexer.token.type != LEX_T_END) {
1123         *errorp = xstrdup("Extra tokens at end of input.");
1124         ok = false;
1125     }
1126
1127     lexer_destroy(&lexer);
1128
1129     return ok;
1130 }
1131
1132 /* Adds 'name' as a subfield of a larger field in 'symtab'.  Whenever
1133  * 'name' is referenced, expression annotation (see expr_annotate()) will
1134  * ensure that 'prereqs' are also true.
1135  *
1136  * 'subfield' must describe the subfield as a string, e.g. "vlan.tci[0..11]"
1137  * for the low 12 bits of a larger field named "vlan.tci". */
1138 struct expr_symbol *
1139 expr_symtab_add_subfield(struct shash *symtab, const char *name,
1140                          const char *prereqs, const char *subfield)
1141 {
1142     struct expr_symbol *symbol;
1143     struct expr_field f;
1144     char *error;
1145
1146     if (!parse_field_from_string(subfield, symtab, &f, &error)) {
1147         VLOG_WARN("%s: error parsing %s subfield (%s)", subfield, name, error);
1148         free(error);
1149         return NULL;
1150     }
1151
1152     enum expr_level level = f.symbol->level;
1153     if (level != EXPR_L_ORDINAL) {
1154         VLOG_WARN("can't define %s as subfield of %s field %s",
1155                   name, expr_level_to_string(level), f.symbol->name);
1156     }
1157
1158     symbol = add_symbol(symtab, name, f.n_bits, prereqs, level, false);
1159     symbol->expansion = xstrdup(subfield);
1160     return symbol;
1161 }
1162
1163 /* Adds a string-valued symbol named 'name' to 'symtab' with the specified
1164  * 'prereqs'. */
1165 struct expr_symbol *
1166 expr_symtab_add_string(struct shash *symtab, const char *name,
1167                        enum mf_field_id id, const char *prereqs)
1168 {
1169     const struct mf_field *field = mf_from_id(id);
1170     struct expr_symbol *symbol;
1171
1172     symbol = add_symbol(symtab, name, 0, prereqs, EXPR_L_NOMINAL, false);
1173     symbol->field = field;
1174     return symbol;
1175 }
1176
1177 static enum expr_level
1178 expr_get_level(const struct expr *expr)
1179 {
1180     const struct expr *sub;
1181     enum expr_level level = EXPR_L_ORDINAL;
1182
1183     switch (expr->type) {
1184     case EXPR_T_CMP:
1185         return (expr->cmp.symbol->level == EXPR_L_NOMINAL
1186                 ? EXPR_L_NOMINAL
1187                 : EXPR_L_BOOLEAN);
1188
1189     case EXPR_T_AND:
1190     case EXPR_T_OR:
1191         LIST_FOR_EACH (sub, node, &expr->andor) {
1192             enum expr_level sub_level = expr_get_level(sub);
1193             level = MIN(level, sub_level);
1194         }
1195         return level;
1196
1197     case EXPR_T_BOOLEAN:
1198         return EXPR_L_BOOLEAN;
1199
1200     default:
1201         OVS_NOT_REACHED();
1202     }
1203 }
1204
1205 static enum expr_level
1206 expr_parse_level(const char *s, const struct shash *symtab, char **errorp)
1207 {
1208     struct expr *expr = expr_parse_string(s, symtab, errorp);
1209     enum expr_level level = expr ? expr_get_level(expr) : EXPR_L_NOMINAL;
1210     expr_destroy(expr);
1211     return level;
1212 }
1213
1214 /* Adds a predicate symbol, whose value is the given Boolean 'expression',
1215  * named 'name' to 'symtab'.  For example, "ip4 && ip4.proto == 6" might be an
1216  * appropriate predicate named "tcp4". */
1217 struct expr_symbol *
1218 expr_symtab_add_predicate(struct shash *symtab, const char *name,
1219                           const char *expansion)
1220 {
1221     struct expr_symbol *symbol;
1222     enum expr_level level;
1223     char *error;
1224
1225     level = expr_parse_level(expansion, symtab, &error);
1226     if (error) {
1227         VLOG_WARN("%s: error parsing %s expansion (%s)",
1228                   expansion, name, error);
1229         free(error);
1230         return NULL;
1231     }
1232
1233     symbol = add_symbol(symtab, name, 1, NULL, level, false);
1234     symbol->expansion = xstrdup(expansion);
1235     return symbol;
1236 }
1237
1238 /* Destroys 'symtab' and all of its symbols. */
1239 void
1240 expr_symtab_destroy(struct shash *symtab)
1241 {
1242     struct shash_node *node, *next;
1243
1244     SHASH_FOR_EACH_SAFE (node, next, symtab) {
1245         struct expr_symbol *symbol = node->data;
1246
1247         shash_delete(symtab, node);
1248         free(symbol->name);
1249         free(symbol->prereqs);
1250         free(symbol->expansion);
1251         free(symbol);
1252     }
1253 }
1254 \f
1255 /* Cloning. */
1256
1257 static struct expr *
1258 expr_clone_cmp(struct expr *expr)
1259 {
1260     struct expr *new = xmemdup(expr, sizeof *expr);
1261     if (!new->cmp.symbol->width) {
1262         new->cmp.string = xstrdup(new->cmp.string);
1263     }
1264     return new;
1265 }
1266
1267 static struct expr *
1268 expr_clone_andor(struct expr *expr)
1269 {
1270     struct expr *new = expr_create_andor(expr->type);
1271     struct expr *sub;
1272
1273     LIST_FOR_EACH (sub, node, &expr->andor) {
1274         struct expr *new_sub = expr_clone(sub);
1275         list_push_back(&new->andor, &new_sub->node);
1276     }
1277     return new;
1278 }
1279
1280 /* Returns a clone of 'expr'.  This is a "deep copy": neither the returned
1281  * expression nor any of its substructure will be shared with 'expr'. */
1282 struct expr *
1283 expr_clone(struct expr *expr)
1284 {
1285     switch (expr->type) {
1286     case EXPR_T_CMP:
1287         return expr_clone_cmp(expr);
1288
1289     case EXPR_T_AND:
1290     case EXPR_T_OR:
1291         return expr_clone_andor(expr);
1292
1293     case EXPR_T_BOOLEAN:
1294         return expr_create_boolean(expr->boolean);
1295     }
1296     OVS_NOT_REACHED();
1297 }
1298 \f
1299 /* Destroys 'expr' and all of the sub-expressions it references. */
1300 void
1301 expr_destroy(struct expr *expr)
1302 {
1303     if (!expr) {
1304         return;
1305     }
1306
1307     struct expr *sub, *next;
1308
1309     switch (expr->type) {
1310     case EXPR_T_CMP:
1311         if (!expr->cmp.symbol->width) {
1312             free(expr->cmp.string);
1313         }
1314         break;
1315
1316     case EXPR_T_AND:
1317     case EXPR_T_OR:
1318         LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1319             list_remove(&sub->node);
1320             expr_destroy(sub);
1321         }
1322         break;
1323
1324     case EXPR_T_BOOLEAN:
1325         break;
1326     }
1327     free(expr);
1328 }
1329 \f
1330 /* Annotation. */
1331
1332 /* An element in a linked list of symbols.
1333  *
1334  * Used to detect when a symbol is being expanded recursively, to allow
1335  * flagging an error. */
1336 struct annotation_nesting {
1337     struct ovs_list node;
1338     const struct expr_symbol *symbol;
1339 };
1340
1341 struct expr *expr_annotate__(struct expr *, const struct shash *symtab,
1342                              struct ovs_list *nesting, char **errorp);
1343
1344 static struct expr *
1345 parse_and_annotate(const char *s, const struct shash *symtab,
1346                    struct ovs_list *nesting, char **errorp)
1347 {
1348     char *error;
1349     struct expr *expr;
1350
1351     expr = expr_parse_string(s, symtab, &error);
1352     if (expr) {
1353         expr = expr_annotate__(expr, symtab, nesting, &error);
1354     }
1355     if (expr) {
1356         *errorp = NULL;
1357     } else {
1358         *errorp = xasprintf("Error parsing expression `%s' encountered as "
1359                             "prerequisite or predicate of initial expression: "
1360                             "%s", s, error);
1361         free(error);
1362     }
1363     return expr;
1364 }
1365
1366 static struct expr *
1367 expr_annotate_cmp(struct expr *expr, const struct shash *symtab,
1368                   struct ovs_list *nesting, char **errorp)
1369 {
1370     const struct expr_symbol *symbol = expr->cmp.symbol;
1371     const struct annotation_nesting *iter;
1372     LIST_FOR_EACH (iter, node, nesting) {
1373         if (iter->symbol == symbol) {
1374             *errorp = xasprintf("Recursive expansion of symbol `%s'.",
1375                                 symbol->name);
1376             expr_destroy(expr);
1377             return NULL;
1378         }
1379     }
1380
1381     struct annotation_nesting an;
1382     an.symbol = symbol;
1383     list_push_back(nesting, &an.node);
1384
1385     struct expr *prereqs = NULL;
1386     if (symbol->prereqs) {
1387         prereqs = parse_and_annotate(symbol->prereqs, symtab, nesting, errorp);
1388         if (!prereqs) {
1389             goto error;
1390         }
1391     }
1392
1393     if (symbol->expansion) {
1394         if (symbol->level == EXPR_L_ORDINAL) {
1395             struct expr_field field;
1396
1397             if (!parse_field_from_string(symbol->expansion, symtab,
1398                                          &field, errorp)) {
1399                 goto error;
1400             }
1401
1402             expr->cmp.symbol = field.symbol;
1403             mf_subvalue_shift(&expr->cmp.value, field.ofs);
1404             mf_subvalue_shift(&expr->cmp.mask, field.ofs);
1405         } else {
1406             struct expr *expansion;
1407
1408             expansion = parse_and_annotate(symbol->expansion, symtab,
1409                                            nesting, errorp);
1410             if (!expansion) {
1411                 goto error;
1412             }
1413
1414             bool positive = (expr->cmp.value.integer & htonll(1)) != 0;
1415             positive ^= expr->cmp.relop == EXPR_R_NE;
1416             if (!positive) {
1417                 expr_not(expansion);
1418             }
1419
1420             expr_destroy(expr);
1421             expr = expansion;
1422         }
1423     }
1424
1425     list_remove(&an.node);
1426     return prereqs ? expr_combine(EXPR_T_AND, expr, prereqs) : expr;
1427
1428 error:
1429     expr_destroy(expr);
1430     expr_destroy(prereqs);
1431     list_remove(&an.node);
1432     return NULL;
1433 }
1434
1435 struct expr *
1436 expr_annotate__(struct expr *expr, const struct shash *symtab,
1437                 struct ovs_list *nesting, char **errorp)
1438 {
1439     switch (expr->type) {
1440     case EXPR_T_CMP:
1441         return expr_annotate_cmp(expr, symtab, nesting, errorp);
1442
1443     case EXPR_T_AND:
1444     case EXPR_T_OR: {
1445         struct expr *sub, *next;
1446
1447         LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1448             list_remove(&sub->node);
1449             struct expr *new_sub = expr_annotate__(sub, symtab,
1450                                                    nesting, errorp);
1451             if (!new_sub) {
1452                 expr_destroy(expr);
1453                 return NULL;
1454             }
1455             expr_insert_andor(expr, next, new_sub);
1456         }
1457         *errorp = NULL;
1458         return expr;
1459     }
1460
1461     case EXPR_T_BOOLEAN:
1462         *errorp = NULL;
1463         return expr;
1464
1465     default:
1466         OVS_NOT_REACHED();
1467     }
1468 }
1469
1470 /* "Annotates" 'expr', which does the following:
1471  *
1472  *     - Applies prerequisites, by locating each comparison operator whose
1473  *       field has a prerequisite and adding a logical AND against those
1474  *       prerequisites.
1475  *
1476  *     - Expands references to subfield symbols, by replacing them by
1477  *       references to their underlying field symbols (suitably shifted).
1478  *
1479  *     - Expands references to predicate symbols, by replacing them by the
1480  *       expressions that they expand to.
1481  *
1482  * In each case, annotation occurs recursively as necessary. */
1483 struct expr *
1484 expr_annotate(struct expr *expr, const struct shash *symtab, char **errorp)
1485 {
1486     struct ovs_list nesting = OVS_LIST_INITIALIZER(&nesting);
1487     return expr_annotate__(expr, symtab, &nesting, errorp);
1488 }
1489 \f
1490 static struct expr *
1491 expr_simplify_ne(struct expr *expr)
1492 {
1493     struct expr *new = NULL;
1494     const union mf_subvalue *value = &expr->cmp.value;
1495     const union mf_subvalue *mask = &expr->cmp.mask;
1496     int w = expr->cmp.symbol->width;
1497     int i;
1498
1499     for (i = 0; (i = bitwise_scan(mask, sizeof *mask, true, i, w)) < w; i++) {
1500         struct expr *e;
1501
1502         e = xzalloc(sizeof *e);
1503         e->type = EXPR_T_CMP;
1504         e->cmp.symbol = expr->cmp.symbol;
1505         e->cmp.relop = EXPR_R_EQ;
1506         bitwise_put_bit(&e->cmp.value, sizeof e->cmp.value, i,
1507                         !bitwise_get_bit(value, sizeof *value, i));
1508         bitwise_put1(&e->cmp.mask, sizeof e->cmp.mask, i);
1509
1510         new = expr_combine(EXPR_T_OR, new, e);
1511     }
1512     ovs_assert(new);
1513
1514     expr_destroy(expr);
1515
1516     return new;
1517 }
1518
1519 static struct expr *
1520 expr_simplify_relational(struct expr *expr)
1521 {
1522     const union mf_subvalue *value = &expr->cmp.value;
1523     int start, n_bits, end;
1524
1525     find_bitwise_range(&expr->cmp.mask, expr->cmp.symbol->width,
1526                        &start, &n_bits);
1527     ovs_assert(n_bits > 0);
1528     end = start + n_bits;
1529
1530     struct expr *new;
1531     if (expr->cmp.relop == EXPR_R_LE || expr->cmp.relop == EXPR_R_GE) {
1532         new = xmemdup(expr, sizeof *expr);
1533         new->cmp.relop = EXPR_R_EQ;
1534     } else {
1535         new = NULL;
1536     }
1537
1538     bool b = expr->cmp.relop == EXPR_R_LT || expr->cmp.relop == EXPR_R_LE;
1539     for (int z = bitwise_scan(value, sizeof *value, b, start, end);
1540          z < end;
1541          z = bitwise_scan(value, sizeof *value, b, z + 1, end)) {
1542         struct expr *e;
1543
1544         e = xmemdup(expr, sizeof *expr);
1545         e->cmp.relop = EXPR_R_EQ;
1546         bitwise_toggle_bit(&e->cmp.value, sizeof e->cmp.value, z);
1547         bitwise_zero(&e->cmp.value, sizeof e->cmp.value, start, z - start);
1548         bitwise_zero(&e->cmp.mask, sizeof e->cmp.mask, start, z - start);
1549         new = expr_combine(EXPR_T_OR, new, e);
1550     }
1551     expr_destroy(expr);
1552     return new ? new : expr_create_boolean(false);
1553 }
1554
1555 /* Takes ownership of 'expr' and returns an equivalent expression whose
1556  * EXPR_T_CMP nodes use only tests for equality (EXPR_R_EQ). */
1557 struct expr *
1558 expr_simplify(struct expr *expr)
1559 {
1560     struct expr *sub, *next;
1561
1562     switch (expr->type) {
1563     case EXPR_T_CMP:
1564         return (expr->cmp.relop == EXPR_R_EQ || !expr->cmp.symbol->width ? expr
1565                 : expr->cmp.relop == EXPR_R_NE ? expr_simplify_ne(expr)
1566                 : expr_simplify_relational(expr));
1567
1568     case EXPR_T_AND:
1569     case EXPR_T_OR:
1570         LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1571             list_remove(&sub->node);
1572             expr_insert_andor(expr, next, expr_simplify(sub));
1573         }
1574         return expr_fix(expr);
1575
1576     case EXPR_T_BOOLEAN:
1577         return expr;
1578     }
1579     OVS_NOT_REACHED();
1580 }
1581 \f
1582 static const struct expr_symbol *
1583 expr_is_cmp(const struct expr *expr)
1584 {
1585     switch (expr->type) {
1586     case EXPR_T_CMP:
1587         return expr->cmp.symbol;
1588
1589     case EXPR_T_AND:
1590     case EXPR_T_OR: {
1591         const struct expr_symbol *prev = NULL;
1592         struct expr *sub;
1593
1594         LIST_FOR_EACH (sub, node, &expr->andor) {
1595             const struct expr_symbol *symbol = expr_is_cmp(sub);
1596             if (!symbol || (prev && symbol != prev)) {
1597                 return NULL;
1598             }
1599             prev = symbol;
1600         }
1601         return prev;
1602     }
1603
1604     case EXPR_T_BOOLEAN:
1605         return NULL;
1606
1607     default:
1608         OVS_NOT_REACHED();
1609     }
1610 }
1611
1612 struct expr_sort {
1613     struct expr *expr;
1614     const struct expr_symbol *relop;
1615     enum expr_type type;
1616 };
1617
1618 static int
1619 compare_expr_sort(const void *a_, const void *b_)
1620 {
1621     const struct expr_sort *a = a_;
1622     const struct expr_sort *b = b_;
1623
1624     if (a->type != b->type) {
1625         return a->type < b->type ? -1 : 1;
1626     } else if (a->relop) {
1627         int cmp = strcmp(a->relop->name, b->relop->name);
1628         if (cmp) {
1629             return cmp;
1630         }
1631
1632         enum expr_type a_type = a->expr->type;
1633         enum expr_type b_type = a->expr->type;
1634         return a_type < b_type ? -1 : a_type > b_type;
1635     } else if (a->type == EXPR_T_AND || a->type == EXPR_T_OR) {
1636         size_t a_len = list_size(&a->expr->andor);
1637         size_t b_len = list_size(&b->expr->andor);
1638         return a_len < b_len ? -1 : a_len > b_len;
1639     } else {
1640         return 0;
1641     }
1642 }
1643
1644 static struct expr *crush_cmps(struct expr *, const struct expr_symbol *);
1645
1646 /* Implementation of crush_cmps() for expr->type == EXPR_T_AND. */
1647 static struct expr *
1648 crush_and(struct expr *expr, const struct expr_symbol *symbol)
1649 {
1650     ovs_assert(!list_is_short(&expr->andor));
1651
1652     union mf_subvalue value, mask;
1653     memset(&value, 0, sizeof value);
1654     memset(&mask, 0, sizeof mask);
1655
1656     struct expr *sub, *next = NULL;
1657     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1658         list_remove(&sub->node);
1659         struct expr *new = crush_cmps(sub, symbol);
1660         switch (new->type) {
1661         case EXPR_T_CMP:
1662             if (!mf_subvalue_intersect(&value, &mask,
1663                                        &new->cmp.value, &new->cmp.mask,
1664                                        &value, &mask)) {
1665                 expr_destroy(new);
1666                 expr_destroy(expr);
1667                 return expr_create_boolean(false);
1668             }
1669             expr_destroy(new);
1670             break;
1671         case EXPR_T_AND:
1672             OVS_NOT_REACHED();
1673         case EXPR_T_OR:
1674             list_insert(&next->node, &new->node);
1675             break;
1676         case EXPR_T_BOOLEAN:
1677             if (!new->boolean) {
1678                 expr_destroy(expr);
1679                 return new;
1680             }
1681             free(new);
1682             break;
1683         }
1684     }
1685     if (list_is_empty(&expr->andor)) {
1686         if (is_all_zeros(&mask, sizeof mask)) {
1687             expr_destroy(expr);
1688             return expr_create_boolean(true);
1689         } else {
1690             struct expr *cmp;
1691             cmp = xmalloc(sizeof *cmp);
1692             cmp->type = EXPR_T_CMP;
1693             cmp->cmp.symbol = symbol;
1694             cmp->cmp.relop = EXPR_R_EQ;
1695             cmp->cmp.value = value;
1696             cmp->cmp.mask = mask;
1697             expr_destroy(expr);
1698             return cmp;
1699         }
1700     } else if (list_is_short(&expr->andor)) {
1701         /* Transform "a && (b || c || d)" into "ab || ac || ad" where "ab" is
1702          * computed as "a && b", etc. */
1703         struct expr *disjuncts = expr_from_node(list_pop_front(&expr->andor));
1704         struct expr *or;
1705
1706         or = xmalloc(sizeof *or);
1707         or->type = EXPR_T_OR;
1708         list_init(&or->andor);
1709
1710         ovs_assert(disjuncts->type == EXPR_T_OR);
1711         LIST_FOR_EACH_SAFE (sub, next, node, &disjuncts->andor) {
1712             ovs_assert(sub->type == EXPR_T_CMP);
1713             list_remove(&sub->node);
1714             if (mf_subvalue_intersect(&value, &mask,
1715                                       &sub->cmp.value, &sub->cmp.mask,
1716                                       &sub->cmp.value, &sub->cmp.mask)) {
1717                 list_push_back(&or->andor, &sub->node);
1718             } else {
1719                 free(sub);
1720             }
1721         }
1722         free(disjuncts);
1723         free(expr);
1724         if (list_is_empty(&or->andor)) {
1725             free(or);
1726             return expr_create_boolean(false);
1727         } else if (list_is_short(&or->andor)) {
1728             struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
1729             free(or);
1730             return cmp;
1731         } else {
1732             return or;
1733         }
1734     } else {
1735         /* Transform "x && (a0 || a1) && (b0 || b1) && ..." into
1736          *           "(xa0b0 || xa0b1 || xa1b0 || xa1b1) && ...". */
1737         struct expr *as = expr_from_node(list_pop_front(&expr->andor));
1738         struct expr *bs = expr_from_node(list_pop_front(&expr->andor));
1739         struct expr *new = NULL;
1740         struct expr *or;
1741
1742         or = xmalloc(sizeof *or);
1743         or->type = EXPR_T_OR;
1744         list_init(&or->andor);
1745
1746         struct expr *a;
1747         LIST_FOR_EACH (a, node, &as->andor) {
1748             union mf_subvalue a_value, a_mask;
1749
1750             ovs_assert(a->type == EXPR_T_CMP);
1751             if (!mf_subvalue_intersect(&value, &mask,
1752                                        &a->cmp.value, &a->cmp.mask,
1753                                        &a_value, &a_mask)) {
1754                 continue;
1755             }
1756
1757             struct expr *b;
1758             LIST_FOR_EACH (b, node, &bs->andor) {
1759                 ovs_assert(b->type == EXPR_T_CMP);
1760                 if (!new) {
1761                     new = xmalloc(sizeof *new);
1762                     new->type = EXPR_T_CMP;
1763                     new->cmp.symbol = symbol;
1764                     new->cmp.relop = EXPR_R_EQ;
1765                 }
1766                 if (mf_subvalue_intersect(&a_value, &a_mask,
1767                                           &b->cmp.value, &b->cmp.mask,
1768                                           &new->cmp.value, &new->cmp.mask)) {
1769                     list_push_back(&or->andor, &new->node);
1770                     new = NULL;
1771                 }
1772             }
1773         }
1774         expr_destroy(as);
1775         expr_destroy(bs);
1776         free(new);
1777
1778         if (list_is_empty(&or->andor)) {
1779             expr_destroy(expr);
1780             free(or);
1781             return expr_create_boolean(false);
1782         } else if (list_is_short(&or->andor)) {
1783             struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
1784             free(or);
1785             if (list_is_empty(&expr->andor)) {
1786                 expr_destroy(expr);
1787                 return crush_cmps(cmp, symbol);
1788             } else {
1789                 return crush_cmps(expr_combine(EXPR_T_AND, cmp, expr), symbol);
1790             }
1791         } else if (!list_is_empty(&expr->andor)) {
1792             struct expr *e = expr_combine(EXPR_T_AND, or, expr);
1793             ovs_assert(!list_is_short(&e->andor));
1794             return crush_cmps(e, symbol);
1795         } else {
1796             expr_destroy(expr);
1797             return crush_cmps(or, symbol);
1798         }
1799     }
1800 }
1801
1802 static int
1803 compare_expr(const void *a_, const void *b_)
1804 {
1805     const struct expr *const *ap = a_;
1806     const struct expr *const *bp = b_;
1807     const struct expr *a = *ap;
1808     const struct expr *b = *bp;
1809     int d = memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value);
1810     if (!d) {
1811         d = memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask);
1812     }
1813     return d;
1814 }
1815
1816 /* Implementation of crush_cmps() for expr->type == EXPR_T_OR. */
1817 static struct expr *
1818 crush_or(struct expr *expr, const struct expr_symbol *symbol)
1819 {
1820     struct expr *sub, *next = NULL;
1821
1822     /* First, crush all the subexpressions.  That might eliminate the
1823      * OR-expression entirely; if so, return the result.  Otherwise, 'expr'
1824      * is now a disjunction of cmps over the same symbol. */
1825     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1826         list_remove(&sub->node);
1827         expr_insert_andor(expr, next, crush_cmps(sub, symbol));
1828     }
1829     expr = expr_fix(expr);
1830     if (expr->type != EXPR_T_OR) {
1831         return expr;
1832     }
1833
1834     /* Sort subexpressions by value and mask, to bring together duplicates. */
1835     size_t n = list_size(&expr->andor);
1836     struct expr **subs = xmalloc(n * sizeof *subs);
1837
1838     size_t i = 0;
1839     LIST_FOR_EACH (sub, node, &expr->andor) {
1840         subs[i++] = sub;
1841     }
1842     ovs_assert(i == n);
1843
1844     qsort(subs, n, sizeof *subs, compare_expr);
1845
1846     list_init(&expr->andor);
1847     list_push_back(&expr->andor, &subs[0]->node);
1848     for (i = 1; i < n; i++) {
1849         struct expr *a = expr_from_node(list_back(&expr->andor));
1850         struct expr *b = subs[i];
1851         if (memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value)
1852             || memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask)) {
1853             list_push_back(&expr->andor, &b->node);
1854         } else {
1855             free(b);
1856         }
1857     }
1858     free(subs);
1859     return expr_fix(expr);
1860 }
1861
1862 /* Takes ownership of 'expr', which must be a cmp in the sense determined by
1863  * 'expr_is_cmp(expr)', where 'symbol' is the symbol returned by that function.
1864  * Returns an equivalent expression owned by the caller that is a single
1865  * EXPR_T_CMP or a disjunction of them or a EXPR_T_BOOLEAN. */
1866 static struct expr *
1867 crush_cmps(struct expr *expr, const struct expr_symbol *symbol)
1868 {
1869     switch (expr->type) {
1870     case EXPR_T_OR:
1871         return crush_or(expr, symbol);
1872
1873     case EXPR_T_AND:
1874         return crush_and(expr, symbol);
1875
1876     case EXPR_T_CMP:
1877         return expr;
1878
1879     case EXPR_T_BOOLEAN:
1880         return expr;
1881
1882     default:
1883         OVS_NOT_REACHED();
1884     }
1885 }
1886
1887 static struct expr *
1888 expr_sort(struct expr *expr)
1889 {
1890     size_t n = list_size(&expr->andor);
1891     struct expr_sort *subs = xmalloc(n * sizeof *subs);
1892     struct expr *sub;
1893     size_t i;
1894
1895     i = 0;
1896     LIST_FOR_EACH (sub, node, &expr->andor) {
1897         subs[i].expr = sub;
1898         subs[i].relop = expr_is_cmp(sub);
1899         subs[i].type = subs[i].relop ? EXPR_T_CMP : sub->type;
1900         i++;
1901     }
1902     ovs_assert(i == n);
1903
1904     qsort(subs, n, sizeof *subs, compare_expr_sort);
1905
1906     list_init(&expr->andor);
1907     for (int i = 0; i < n; ) {
1908         if (subs[i].relop) {
1909             int j;
1910             for (j = i + 1; j < n; j++) {
1911                 if (subs[i].relop != subs[j].relop) {
1912                     break;
1913                 }
1914             }
1915
1916             struct expr *crushed;
1917             if (j == i + 1) {
1918                 crushed = crush_cmps(subs[i].expr, subs[i].relop);
1919             } else {
1920                 struct expr *combined = subs[i].expr;
1921                 for (int k = i + 1; k < j; k++) {
1922                     combined = expr_combine(EXPR_T_AND, combined,
1923                                             subs[k].expr);
1924                 }
1925                 ovs_assert(!list_is_short(&combined->andor));
1926                 crushed = crush_cmps(combined, subs[i].relop);
1927             }
1928             if (crushed->type == EXPR_T_BOOLEAN) {
1929                 if (!crushed->boolean) {
1930                     for (int k = j; k < n; k++) {
1931                         expr_destroy(subs[k].expr);
1932                     }
1933                     expr_destroy(expr);
1934                     expr = crushed;
1935                     break;
1936                 } else {
1937                     free(crushed);
1938                 }
1939             } else {
1940                 expr = expr_combine(EXPR_T_AND, expr, crushed);
1941             }
1942             i = j;
1943         } else {
1944             expr = expr_combine(EXPR_T_AND, expr, subs[i++].expr);
1945         }
1946     }
1947     free(subs);
1948
1949     return expr;
1950 }
1951
1952 static struct expr *expr_normalize_or(struct expr *expr);
1953
1954 /* Returns 'expr', which is an AND, reduced to OR(AND(clause)) where
1955  * a clause is a cmp or a disjunction of cmps on a single field. */
1956 static struct expr *
1957 expr_normalize_and(struct expr *expr)
1958 {
1959     ovs_assert(expr->type == EXPR_T_AND);
1960
1961     expr = expr_sort(expr);
1962     if (expr->type != EXPR_T_AND) {
1963         ovs_assert(expr->type == EXPR_T_BOOLEAN);
1964         return expr;
1965     }
1966
1967     struct expr *a, *b;
1968     LIST_FOR_EACH_SAFE (a, b, node, &expr->andor) {
1969         if (&b->node == &expr->andor
1970             || a->type != EXPR_T_CMP || b->type != EXPR_T_CMP) {
1971         } else if (a->cmp.symbol != b->cmp.symbol) {
1972             continue;
1973         } else if (mf_subvalue_intersect(&a->cmp.value, &a->cmp.mask,
1974                                          &b->cmp.value, &b->cmp.mask,
1975                                          &b->cmp.value, &b->cmp.mask)) {
1976             list_remove(&a->node);
1977             expr_destroy(a);
1978         } else {
1979             expr_destroy(expr);
1980             return expr_create_boolean(false);
1981         }
1982     }
1983     if (list_is_short(&expr->andor)) {
1984         struct expr *sub = expr_from_node(list_front(&expr->andor));
1985         free(expr);
1986         return sub;
1987     }
1988
1989     struct expr *sub;
1990     LIST_FOR_EACH (sub, node, &expr->andor) {
1991         if (sub->type == EXPR_T_CMP) {
1992             continue;
1993         }
1994
1995         ovs_assert(sub->type == EXPR_T_OR);
1996         const struct expr_symbol *symbol = expr_is_cmp(sub);
1997         if (!symbol || symbol->must_crossproduct) {
1998             struct expr *or = expr_create_andor(EXPR_T_OR);
1999             struct expr *k;
2000
2001             LIST_FOR_EACH (k, node, &sub->andor) {
2002                 struct expr *and = expr_create_andor(EXPR_T_AND);
2003                 struct expr *m;
2004
2005                 LIST_FOR_EACH (m, node, &expr->andor) {
2006                     struct expr *term = m == sub ? k : m;
2007                     if (term->type == EXPR_T_AND) {
2008                         struct expr *p;
2009
2010                         LIST_FOR_EACH (p, node, &term->andor) {
2011                             struct expr *new = expr_clone(p);
2012                             list_push_back(&and->andor, &new->node);
2013                         }
2014                     } else {
2015                         struct expr *new = expr_clone(term);
2016                         list_push_back(&and->andor, &new->node);
2017                     }
2018                 }
2019                 list_push_back(&or->andor, &and->node);
2020             }
2021             expr_destroy(expr);
2022             return expr_normalize_or(or);
2023         }
2024     }
2025     return expr;
2026 }
2027
2028 static struct expr *
2029 expr_normalize_or(struct expr *expr)
2030 {
2031     struct expr *sub, *next;
2032
2033     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
2034         if (sub->type == EXPR_T_AND) {
2035             list_remove(&sub->node);
2036
2037             struct expr *new = expr_normalize_and(sub);
2038             if (new->type == EXPR_T_BOOLEAN) {
2039                 if (new->boolean) {
2040                     expr_destroy(expr);
2041                     return new;
2042                 }
2043                 free(new);
2044             } else {
2045                 expr_insert_andor(expr, next, new);
2046             }
2047         } else {
2048             ovs_assert(sub->type == EXPR_T_CMP);
2049         }
2050     }
2051     if (list_is_empty(&expr->andor)) {
2052         free(expr);
2053         return expr_create_boolean(false);
2054     }
2055     if (list_is_short(&expr->andor)) {
2056         struct expr *sub = expr_from_node(list_pop_front(&expr->andor));
2057         free(expr);
2058         return sub;
2059     }
2060
2061     return expr;
2062 }
2063
2064 /* Takes ownership of 'expr', which is either a constant "true" or "false" or
2065  * an expression in terms of only relationals, AND, and OR.  Returns either a
2066  * constant "true" or "false" or 'expr' reduced to OR(AND(clause)) where a
2067  * clause is a cmp or a disjunction of cmps on a single field.  This form is
2068  * significant because it is a form that can be directly converted to OpenFlow
2069  * flows with the Open vSwitch "conjunctive match" extension.
2070  *
2071  * 'expr' must already have been simplified, with expr_simplify(). */
2072 struct expr *
2073 expr_normalize(struct expr *expr)
2074 {
2075     switch (expr->type) {
2076     case EXPR_T_CMP:
2077         return expr;
2078
2079     case EXPR_T_AND:
2080         return expr_normalize_and(expr);
2081
2082     case EXPR_T_OR:
2083         return expr_normalize_or(expr);
2084
2085     case EXPR_T_BOOLEAN:
2086         return expr;
2087     }
2088     OVS_NOT_REACHED();
2089 }
2090 \f
2091 /* Creates, initializes, and returns a new 'struct expr_match'.  If 'm' is
2092  * nonnull then it is copied into the new expr_match, otherwise the new
2093  * expr_match's 'match' member is initialized to a catch-all match for the
2094  * caller to refine in-place.
2095  *
2096  * If 'conj_id' is nonzero, adds one conjunction based on 'conj_id', 'clause',
2097  * and 'n_clauses' to the returned 'struct expr_match', otherwise the
2098  * expr_match will not have any conjunctions.
2099  *
2100  * The caller should use expr_match_add() to add the expr_match to a hash table
2101  * after it is finalized. */
2102 static struct expr_match *
2103 expr_match_new(const struct match *m, uint8_t clause, uint8_t n_clauses,
2104                uint32_t conj_id)
2105 {
2106     struct expr_match *match = xmalloc(sizeof *match);
2107     if (m) {
2108         match->match = *m;
2109     } else {
2110         match_init_catchall(&match->match);
2111     }
2112     if (conj_id) {
2113         match->conjunctions = xmalloc(sizeof *match->conjunctions);
2114         match->conjunctions[0].id = conj_id;
2115         match->conjunctions[0].clause = clause;
2116         match->conjunctions[0].n_clauses = n_clauses;
2117         match->n = 1;
2118         match->allocated = 1;
2119     } else {
2120         match->conjunctions = NULL;
2121         match->n = 0;
2122         match->allocated = 0;
2123     }
2124     return match;
2125 }
2126
2127 /* Adds 'match' to hash table 'matches', which becomes the new owner of
2128  * 'match'.
2129  *
2130  * This might actually destroy 'match' because it gets merged together with
2131  * some existing conjunction.*/
2132 static void
2133 expr_match_add(struct hmap *matches, struct expr_match *match)
2134 {
2135     uint32_t hash = match_hash(&match->match, 0);
2136     struct expr_match *m;
2137
2138     HMAP_FOR_EACH_WITH_HASH (m, hmap_node, hash, matches) {
2139         if (match_equal(&m->match, &match->match)) {
2140             if (!m->n || !match->n) {
2141                 free(m->conjunctions);
2142                 m->conjunctions = NULL;
2143                 m->n = 0;
2144                 m->allocated = 0;
2145             } else {
2146                 ovs_assert(match->n == 1);
2147                 if (m->n >= m->allocated) {
2148                     m->conjunctions = x2nrealloc(m->conjunctions,
2149                                                  &m->allocated,
2150                                                  sizeof *m->conjunctions);
2151                 }
2152                 m->conjunctions[m->n++] = match->conjunctions[0];
2153             }
2154             free(match->conjunctions);
2155             free(match);
2156             return;
2157         }
2158     }
2159
2160     hmap_insert(matches, &match->hmap_node, hash);
2161 }
2162
2163 static bool
2164 constrain_match(const struct expr *expr, const struct simap *ports,
2165                 struct match *m)
2166 {
2167     ovs_assert(expr->type == EXPR_T_CMP);
2168     if (expr->cmp.symbol->width) {
2169         mf_mask_subfield(expr->cmp.symbol->field, &expr->cmp.value,
2170                          &expr->cmp.mask, m);
2171     } else {
2172         const struct simap_node *node;
2173         node = ports ? simap_find(ports, expr->cmp.string) : NULL;
2174         if (!node) {
2175             return false;
2176         }
2177
2178         struct mf_subfield sf;
2179         sf.field = expr->cmp.symbol->field;
2180         sf.ofs = 0;
2181         sf.n_bits = expr->cmp.symbol->field->n_bits;
2182
2183         union mf_subvalue x;
2184         memset(&x, 0, sizeof x);
2185         x.integer = htonll(node->data);
2186
2187         mf_write_subfield(&sf, &x, m);
2188     }
2189     return true;
2190 }
2191
2192 static bool
2193 add_disjunction(const struct expr *or, const struct simap *ports,
2194                 struct match *m, uint8_t clause, uint8_t n_clauses,
2195                 uint32_t conj_id, struct hmap *matches)
2196 {
2197     struct expr *sub;
2198     int n = 0;
2199
2200     ovs_assert(or->type == EXPR_T_OR);
2201     LIST_FOR_EACH (sub, node, &or->andor) {
2202         struct expr_match *match = expr_match_new(m, clause, n_clauses,
2203                                                   conj_id);
2204         if (constrain_match(sub, ports, &match->match)) {
2205             expr_match_add(matches, match);
2206             n++;
2207         } else {
2208             free(match->conjunctions);
2209             free(match);
2210         }
2211     }
2212
2213     /* If n == 1, then this didn't really need to be a disjunction.  Oh well,
2214      * that shouldn't happen much. */
2215     return n > 0;
2216 }
2217
2218 static void
2219 add_conjunction(const struct expr *and, const struct simap *ports,
2220                 uint32_t *n_conjsp, struct hmap *matches)
2221 {
2222     struct match match;
2223     int n_clauses = 0;
2224     struct expr *sub;
2225
2226     match_init_catchall(&match);
2227
2228     ovs_assert(and->type == EXPR_T_AND);
2229     LIST_FOR_EACH (sub, node, &and->andor) {
2230         switch (sub->type) {
2231         case EXPR_T_CMP:
2232             if (!constrain_match(sub, ports, &match)) {
2233                 return;
2234             }
2235             break;
2236         case EXPR_T_OR:
2237             n_clauses++;
2238             break;
2239         case EXPR_T_AND:
2240         case EXPR_T_BOOLEAN:
2241             OVS_NOT_REACHED();
2242         }
2243     }
2244
2245     if (!n_clauses) {
2246         expr_match_add(matches, expr_match_new(&match, 0, 0, 0));
2247     } else if (n_clauses == 1) {
2248         LIST_FOR_EACH (sub, node, &and->andor) {
2249             if (sub->type == EXPR_T_OR) {
2250                 add_disjunction(sub, ports, &match, 0, 0, 0, matches);
2251             }
2252         }
2253     } else {
2254         int clause = 0;
2255         (*n_conjsp)++;
2256         LIST_FOR_EACH (sub, node, &and->andor) {
2257             if (sub->type == EXPR_T_OR) {
2258                 if (!add_disjunction(sub, ports, &match, clause++,
2259                                      n_clauses, *n_conjsp, matches)) {
2260                     /* This clause can't ever match, so we might as well skip
2261                      * adding the other clauses--the overall disjunctive flow
2262                      * can't ever match.  Ideally we would also back out all of
2263                      * the clauses we already added, but that seems like a lot
2264                      * of trouble for a case that might never occur in
2265                      * practice. */
2266                     return;
2267                 }
2268             }
2269         }
2270
2271         /* Add the flow that matches on conj_id. */
2272         match_set_conj_id(&match, *n_conjsp);
2273         expr_match_add(matches, expr_match_new(&match, 0, 0, 0));
2274     }
2275 }
2276
2277 static void
2278 add_cmp_flow(const struct expr *cmp, const struct simap *ports,
2279              struct hmap *matches)
2280 {
2281     struct expr_match *m = expr_match_new(NULL, 0, 0, 0);
2282     if (constrain_match(cmp, ports, &m->match)) {
2283         expr_match_add(matches, m);
2284     } else {
2285         free(m);
2286     }
2287 }
2288
2289 /* Converts 'expr', which must be in the form returned by expr_normalize(), to
2290  * a collection of Open vSwitch flows in 'matches', which this function
2291  * initializes to an hmap of "struct expr_match" structures.  Returns the
2292  * number of conjunctive match IDs consumed by 'matches', which uses
2293  * conjunctive match IDs beginning with 0; the caller must offset or remap them
2294  * into the desired range as necessary.
2295  *
2296  * The matches inserted into 'matches' will be of three distinct kinds:
2297  *
2298  *     - Ordinary flows.  The caller should add these OpenFlow flows with
2299  *       its desired actions.
2300  *
2301  *     - Conjunctive flows, distinguished by 'n > 0' in the expr_match
2302  *       structure.  The caller should add these OpenFlow flows with the
2303  *       conjunction(id, k/n) actions as specified in the 'conjunctions' array,
2304  *       remapping the ids.
2305  *
2306  *     - conj_id flows, distinguished by matching on the "conj_id" field.  The
2307  *       caller should remap the conj_id and add the OpenFlow flow with its
2308  *       desired actions.
2309  *
2310  * 'ports' must be a map from strings (presumably names of ports) to integers.
2311  * Any comparisons against string fields in 'expr' are translated into integers
2312  * through this map.  A comparison against a string that is not in 'ports' acts
2313  * like a Boolean "false"; that is, it will always fail to match.  For a simple
2314  * expression, this means that the overall expression always fails to match,
2315  * but an expression with a disjunction on the string field might still match
2316  * on other port names.
2317  *
2318  * (This treatment of string fields might be too simplistic in general, but it
2319  * seems reasonable for now when string fields are used only for ports.) */
2320 uint32_t
2321 expr_to_matches(const struct expr *expr, const struct simap *ports,
2322                 struct hmap *matches)
2323 {
2324     uint32_t n_conjs = 0;
2325
2326     hmap_init(matches);
2327     switch (expr->type) {
2328     case EXPR_T_CMP:
2329         add_cmp_flow(expr, ports, matches);
2330         break;
2331
2332     case EXPR_T_AND:
2333         add_conjunction(expr, ports, &n_conjs, matches);
2334         break;
2335
2336     case EXPR_T_OR:
2337         if (expr_is_cmp(expr)) {
2338             struct expr *sub;
2339
2340             LIST_FOR_EACH (sub, node, &expr->andor) {
2341                 add_cmp_flow(sub, ports, matches);
2342             }
2343         } else {
2344             struct expr *sub;
2345
2346             LIST_FOR_EACH (sub, node, &expr->andor) {
2347                 if (sub->type == EXPR_T_AND) {
2348                     add_conjunction(sub, ports, &n_conjs, matches);
2349                 } else {
2350                     add_cmp_flow(sub, ports, matches);
2351                 }
2352             }
2353         }
2354         break;
2355
2356     case EXPR_T_BOOLEAN:
2357         if (expr->boolean) {
2358             struct expr_match *m = expr_match_new(NULL, 0, 0, 0);
2359             expr_match_add(matches, m);
2360         } else {
2361             /* No match. */
2362         }
2363         break;
2364     }
2365     return n_conjs;
2366 }
2367
2368 /* Destroys all of the 'struct expr_match'es in 'matches', as well as the
2369  * 'matches' hmap itself. */
2370 void
2371 expr_matches_destroy(struct hmap *matches)
2372 {
2373     struct expr_match *m, *n;
2374
2375     HMAP_FOR_EACH_SAFE (m, n, hmap_node, matches) {
2376         hmap_remove(matches, &m->hmap_node);
2377         free(m->conjunctions);
2378         free(m);
2379     }
2380     hmap_destroy(matches);
2381 }
2382
2383 /* Prints a representation of the 'struct expr_match'es in 'matches' to
2384  * 'stream'. */
2385 void
2386 expr_matches_print(const struct hmap *matches, FILE *stream)
2387 {
2388     if (hmap_is_empty(matches)) {
2389         fputs("(no flows)\n", stream);
2390         return;
2391     }
2392
2393     const struct expr_match *m;
2394     HMAP_FOR_EACH (m, hmap_node, matches) {
2395         char *s = match_to_string(&m->match, OFP_DEFAULT_PRIORITY);
2396         fputs(s, stream);
2397         free(s);
2398
2399         if (m->n) {
2400             for (int i = 0; i < m->n; i++) {
2401                 const struct cls_conjunction *c = &m->conjunctions[i];
2402                 fprintf(stream, "%c conjunction(%"PRIu32", %d/%d)",
2403                         i == 0 ? ':' : ',', c->id, c->clause, c->n_clauses);
2404             }
2405         }
2406         putc('\n', stream);
2407     }
2408 }
2409 \f
2410 /* Returns true if 'expr' honors the invariants for expressions (see the large
2411  * comment above "struct expr" in expr.h), false otherwise. */
2412 bool
2413 expr_honors_invariants(const struct expr *expr)
2414 {
2415     const struct expr *sub;
2416
2417     switch (expr->type) {
2418     case EXPR_T_CMP:
2419         if (expr->cmp.symbol->width) {
2420             for (int i = 0; i < ARRAY_SIZE(expr->cmp.value.be64); i++) {
2421                 if (expr->cmp.value.be64[i] & ~expr->cmp.mask.be64[i]) {
2422                     return false;
2423                 }
2424             }
2425         }
2426         return true;
2427
2428     case EXPR_T_AND:
2429     case EXPR_T_OR:
2430         if (list_is_short(&expr->andor)) {
2431             return false;
2432         }
2433         LIST_FOR_EACH (sub, node, &expr->andor) {
2434             if (sub->type == expr->type || !expr_honors_invariants(sub)) {
2435                 return false;
2436             }
2437         }
2438         return true;
2439
2440     case EXPR_T_BOOLEAN:
2441         return true;
2442
2443     default:
2444         OVS_NOT_REACHED();
2445     }
2446 }
2447
2448 static bool
2449 expr_is_normalized_and(const struct expr *expr)
2450 {
2451     /* XXX should also check that no symbol is repeated. */
2452     const struct expr *sub;
2453
2454     LIST_FOR_EACH (sub, node, &expr->andor) {
2455         if (!expr_is_cmp(sub)) {
2456             return false;
2457         }
2458     }
2459     return true;
2460 }
2461
2462 /* Returns true if 'expr' is in the form returned by expr_normalize(), false
2463  * otherwise. */
2464 bool
2465 expr_is_normalized(const struct expr *expr)
2466 {
2467     switch (expr->type) {
2468     case EXPR_T_CMP:
2469         return true;
2470
2471     case EXPR_T_AND:
2472         return expr_is_normalized_and(expr);
2473
2474     case EXPR_T_OR:
2475         if (!expr_is_cmp(expr)) {
2476             const struct expr *sub;
2477
2478             LIST_FOR_EACH (sub, node, &expr->andor) {
2479                 if (!expr_is_cmp(sub) && !expr_is_normalized_and(sub)) {
2480                     return false;
2481                 }
2482             }
2483         }
2484         return true;
2485
2486     case EXPR_T_BOOLEAN:
2487         return true;
2488
2489     default:
2490         OVS_NOT_REACHED();
2491     }
2492 }
2493 \f
2494 /* Action parsing helper. */
2495
2496 static struct expr *
2497 parse_assignment(struct expr_context *ctx, const struct simap *ports,
2498                  struct ofpbuf *ofpacts)
2499 {
2500     struct expr *prereqs = NULL;
2501
2502     struct expr_field f;
2503     if (!parse_field(ctx, &f)) {
2504         goto exit;
2505     }
2506     if (!lexer_match(ctx->lexer, LEX_T_EQUALS)) {
2507         expr_syntax_error(ctx, "expecting `='.");
2508         goto exit;
2509     }
2510
2511     if (f.symbol->expansion && f.symbol->level != EXPR_L_ORDINAL) {
2512         expr_error(ctx, "Can't assign to predicate symbol %s.",
2513                    f.symbol->name);
2514         goto exit;
2515     }
2516
2517     struct expr_constant_set cs;
2518     if (!parse_constant_set(ctx, &cs)) {
2519         goto exit;
2520     }
2521
2522     if (!type_check(ctx, &f, &cs)) {
2523         goto exit_destroy_cs;
2524     }
2525     if (cs.in_curlies) {
2526         expr_error(ctx, "Assignments require a single value.");
2527         goto exit_destroy_cs;
2528     }
2529
2530     const struct expr_symbol *orig_symbol = f.symbol;
2531     union expr_constant *c = cs.values;
2532     for (;;) {
2533         /* Accumulate prerequisites. */
2534         if (f.symbol->prereqs) {
2535             struct ovs_list nesting = OVS_LIST_INITIALIZER(&nesting);
2536             char *error;
2537             struct expr *e;
2538             e = parse_and_annotate(f.symbol->prereqs, ctx->symtab, &nesting,
2539                                    &error);
2540             if (error) {
2541                 expr_error(ctx, "%s", error);
2542                 free(error);
2543                 goto exit_destroy_cs;
2544             }
2545             prereqs = expr_combine(EXPR_T_AND, prereqs, e);
2546         }
2547
2548         /* If there's no expansion, we're done. */
2549         if (!f.symbol->expansion) {
2550             break;
2551         }
2552
2553         /* Expand. */
2554         struct expr_field expansion;
2555         char *error;
2556         if (!parse_field_from_string(f.symbol->expansion, ctx->symtab,
2557                                      &expansion, &error)) {
2558             expr_error(ctx, "%s", error);
2559             free(error);
2560             goto exit_destroy_cs;
2561         }
2562         f.symbol = expansion.symbol;
2563         f.ofs += expansion.ofs;
2564     }
2565
2566     if (!f.symbol->field->writable) {
2567         expr_error(ctx, "Field %s is not modifiable.", orig_symbol->name);
2568         goto exit_destroy_cs;
2569     }
2570
2571     struct ofpact_set_field *sf = ofpact_put_SET_FIELD(ofpacts);
2572     sf->field = f.symbol->field;
2573     if (f.symbol->width) {
2574         mf_subvalue_shift(&c->value, f.ofs);
2575         if (!c->masked) {
2576             memset(&c->mask, 0, sizeof c->mask);
2577             bitwise_one(&c->mask, sizeof c->mask, f.ofs, f.n_bits);
2578         } else {
2579             mf_subvalue_shift(&c->mask, f.ofs);
2580         }
2581
2582         memcpy(&sf->value, &c->value.u8[sizeof c->value - sf->field->n_bytes],
2583                sf->field->n_bytes);
2584         memcpy(&sf->mask, &c->mask.u8[sizeof c->mask - sf->field->n_bytes],
2585                sf->field->n_bytes);
2586     } else {
2587         uint32_t port = simap_get(ports, c->string);
2588         bitwise_put(port, &sf->value,
2589                     sf->field->n_bytes, 0, sf->field->n_bits);
2590         bitwise_put(UINT64_MAX, &sf->mask,
2591                     sf->field->n_bytes, 0, sf->field->n_bits);
2592     }
2593
2594 exit_destroy_cs:
2595     expr_constant_set_destroy(&cs);
2596 exit:
2597     return prereqs;
2598 }
2599
2600 /* A helper for actions_parse(), to parse an OVN assignment action in the form
2601  * "field = value" into 'ofpacts'.  The parameters and return value match those
2602  * for actions_parse(). */
2603 char *
2604 expr_parse_assignment(struct lexer *lexer, const struct shash *symtab,
2605                       const struct simap *ports,
2606                       struct ofpbuf *ofpacts, struct expr **prereqsp)
2607 {
2608     struct expr_context ctx;
2609     ctx.lexer = lexer;
2610     ctx.symtab = symtab;
2611     ctx.error = NULL;
2612     ctx.not = false;
2613
2614     struct expr *prereqs = parse_assignment(&ctx, ports, ofpacts);
2615     if (ctx.error) {
2616         expr_destroy(prereqs);
2617         prereqs = NULL;
2618     }
2619     *prereqsp = prereqs;
2620     return ctx.error;
2621 }