ovn/lib/expr.c

   1 /*
   2  * Copyright (c) 2015 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <config.h>
  18 #include "expr.h"
  19 #include "dynamic-string.h"
  20 #include "json.h"
  21 #include "lex.h"
  22 #include "match.h"
  23 #include "shash.h"
  24 #include "simap.h"
  25 #include "openvswitch/vlog.h"
  26
  27 VLOG_DEFINE_THIS_MODULE(expr);
  28 \f
  29 /* Returns the name of measurement level 'level'. */
  30 const char *
  31 expr_level_to_string(enum expr_level level)
  32 {
  33     switch (level) {
  34     case EXPR_L_NOMINAL: return "nominal";
  35     case EXPR_L_BOOLEAN: return "Boolean";
  36     case EXPR_L_ORDINAL: return "ordinal";
  37     default: OVS_NOT_REACHED();
  38     }
  39 }
  40 \f
  41 /* Relational operators. */
  42
  43 /* Returns a string form of relational operator 'relop'. */
  44 const char *
  45 expr_relop_to_string(enum expr_relop relop)
  46 {
  47     switch (relop) {
  48     case EXPR_R_EQ: return "==";
  49     case EXPR_R_NE: return "!=";
  50     case EXPR_R_LT: return "<";
  51     case EXPR_R_LE: return "<=";
  52     case EXPR_R_GT: return ">";
  53     case EXPR_R_GE: return ">=";
  54     default: OVS_NOT_REACHED();
  55     }
  56 }
  57
  58 bool
  59 expr_relop_from_token(enum lex_type type, enum expr_relop *relop)
  60 {
  61     enum expr_relop r;
  62
  63     switch ((int) type) {
  64     case LEX_T_EQ: r = EXPR_R_EQ; break;
  65     case LEX_T_NE: r = EXPR_R_NE; break;
  66     case LEX_T_LT: r = EXPR_R_LT; break;
  67     case LEX_T_LE: r = EXPR_R_LE; break;
  68     case LEX_T_GT: r = EXPR_R_GT; break;
  69     case LEX_T_GE: r = EXPR_R_GE; break;
  70     default: return false;
  71     }
  72
  73     if (relop) {
  74         *relop = r;
  75     }
  76     return true;
  77 }
  78
  79 /* Returns the relational operator that 'relop' becomes if you turn the
  80  * relation's operands around, e.g. EXPR_R_EQ does not change because "a == b"
  81  * and "b == a" are equivalent, but EXPR_R_LE becomes EXPR_R_GE because "a <=
  82  * b" is equivalent to "b >= a". */
  83 static enum expr_relop
  84 expr_relop_turn(enum expr_relop relop)
  85 {
  86     switch (relop) {
  87     case EXPR_R_EQ: return EXPR_R_EQ;
  88     case EXPR_R_NE: return EXPR_R_NE;
  89     case EXPR_R_LT: return EXPR_R_GT;
  90     case EXPR_R_LE: return EXPR_R_GE;
  91     case EXPR_R_GT: return EXPR_R_LT;
  92     case EXPR_R_GE: return EXPR_R_LE;
  93     default: OVS_NOT_REACHED();
  94     }
  95 }
  96
  97 /* Returns the relational operator that is the opposite of 'relop'. */
  98 static enum expr_relop
  99 expr_relop_invert(enum expr_relop relop)
 100 {
 101     switch (relop) {
 102     case EXPR_R_EQ: return EXPR_R_NE;
 103     case EXPR_R_NE: return EXPR_R_EQ;
 104     case EXPR_R_LT: return EXPR_R_GE;
 105     case EXPR_R_LE: return EXPR_R_GT;
 106     case EXPR_R_GT: return EXPR_R_LE;
 107     case EXPR_R_GE: return EXPR_R_LT;
 108     default: OVS_NOT_REACHED();
 109     }
 110 }
 111 \f
 112 /* Constructing and manipulating expressions. */
 113
 114 /* Creates and returns a logical AND or OR expression (according to 'type',
 115  * which must be EXPR_T_AND or EXPR_T_OR) that initially has no
 116  * sub-expressions.  (To satisfy the invariants for expressions, the caller
 117  * must add at least two sub-expressions whose types are different from
 118  * 'type'.) */
 119 struct expr *
 120 expr_create_andor(enum expr_type type)
 121 {
 122     struct expr *e = xmalloc(sizeof *e);
 123     e->type = type;
 124     list_init(&e->andor);
 125     return e;
 126 }
 127
 128 /* Returns a logical AND or OR expression (according to 'type', which must be
 129  * EXPR_T_AND or EXPR_T_OR) whose sub-expressions are 'a' and 'b', with some
 130  * flexibility:
 131  *
 132  *     - If 'a' or 'b' is NULL, just returns the other one (which means that if
 133  *       that other one is not of the given 'type', then the returned
 134  *       expression is not either).
 135  *
 136  *     - If 'a' or 'b', or both, have type 'type', then they are combined into
 137  *       a single node that satisfies the invariants for expressions. */
 138 struct expr *
 139 expr_combine(enum expr_type type, struct expr *a, struct expr *b)
 140 {
 141     if (!a) {
 142         return b;
 143     } else if (!b) {
 144         return a;
 145     } else if (a->type == type) {
 146         if (b->type == type) {
 147             list_splice(&a->andor, b->andor.next, &b->andor);
 148             free(b);
 149         } else {
 150             list_push_back(&a->andor, &b->node);
 151         }
 152         return a;
 153     } else if (b->type == type) {
 154         list_push_front(&b->andor, &a->node);
 155         return b;
 156     } else {
 157         struct expr *e = expr_create_andor(type);
 158         list_push_back(&e->andor, &a->node);
 159         list_push_back(&e->andor, &b->node);
 160         return e;
 161     }
 162 }
 163
 164 static void
 165 expr_insert_andor(struct expr *andor, struct expr *before, struct expr *new)
 166 {
 167     if (new->type == andor->type) {
 168         if (andor->type == EXPR_T_AND) {
 169             /* Conjunction junction, what's your function? */
 170         }
 171         list_splice(&before->node, new->andor.next, &new->andor);
 172         free(new);
 173     } else {
 174         list_insert(&before->node, &new->node);
 175     }
 176 }
 177
 178 /* Returns an EXPR_T_BOOLEAN expression with value 'b'. */
 179 struct expr *
 180 expr_create_boolean(bool b)
 181 {
 182     struct expr *e = xmalloc(sizeof *e);
 183     e->type = EXPR_T_BOOLEAN;
 184     e->boolean = b;
 185     return e;
 186 }
 187
 188 static void
 189 expr_not(struct expr *expr)
 190 {
 191     struct expr *sub;
 192
 193     switch (expr->type) {
 194     case EXPR_T_CMP:
 195         expr->cmp.relop = expr_relop_invert(expr->cmp.relop);
 196         break;
 197
 198     case EXPR_T_AND:
 199     case EXPR_T_OR:
 200         LIST_FOR_EACH (sub, node, &expr->andor) {
 201             expr_not(sub);
 202         }
 203         expr->type = expr->type == EXPR_T_AND ? EXPR_T_OR : EXPR_T_AND;
 204         break;
 205
 206     case EXPR_T_BOOLEAN:
 207         expr->boolean = !expr->boolean;
 208         break;
 209     default:
 210         OVS_NOT_REACHED();
 211     }
 212 }
 213
 214 static struct expr *
 215 expr_fix_andor(struct expr *expr, bool short_circuit)
 216 {
 217     struct expr *sub, *next;
 218
 219     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
 220         if (sub->type == EXPR_T_BOOLEAN) {
 221             if (sub->boolean == short_circuit) {
 222                 expr_destroy(expr);
 223                 return expr_create_boolean(short_circuit);
 224             } else {
 225                 list_remove(&sub->node);
 226                 expr_destroy(sub);
 227             }
 228         }
 229     }
 230
 231     if (list_is_short(&expr->andor)) {
 232         if (list_is_empty(&expr->andor)) {
 233             free(expr);
 234             return expr_create_boolean(!short_circuit);
 235         } else {
 236             sub = expr_from_node(list_front(&expr->andor));
 237             free(expr);
 238             return sub;
 239         }
 240     } else {
 241         return expr;
 242     }
 243 }
 244
 245 static struct expr *
 246 expr_fix(struct expr *expr)
 247 {
 248     switch (expr->type) {
 249     case EXPR_T_CMP:
 250         return expr;
 251
 252     case EXPR_T_AND:
 253         return expr_fix_andor(expr, false);
 254
 255     case EXPR_T_OR:
 256         return expr_fix_andor(expr, true);
 257
 258     case EXPR_T_BOOLEAN:
 259         return expr;
 260
 261     default:
 262         OVS_NOT_REACHED();
 263     }
 264 }
 265 \f
 266 /* Formatting. */
 267
 268 static void
 269 find_bitwise_range(const union mf_subvalue *sv, int width,
 270                    int *startp, int *n_bitsp)
 271 {
 272     unsigned int start = bitwise_scan(sv, sizeof *sv, true, 0, width);
 273     if (start < width) {
 274         unsigned int end = bitwise_scan(sv, sizeof *sv, false, start, width);
 275         if (end >= width
 276             || bitwise_scan(sv, sizeof *sv, true, end, width) >= width) {
 277             *startp = start;
 278             *n_bitsp = end - start;
 279             return;
 280         }
 281     }
 282     *startp = *n_bitsp = 0;
 283 }
 284
 285 static void
 286 expr_format_string(const char *s, struct ds *ds)
 287 {
 288     struct json json = {
 289         .type = JSON_STRING,
 290         .u.string = CONST_CAST(char *, s),
 291     };
 292     json_to_ds(&json, 0, ds);
 293 }
 294
 295 static void
 296 expr_format_cmp(const struct expr *e, struct ds *s)
 297 {
 298     /* The common case is numerical comparisons.
 299      * Handle string comparisons as a special case. */
 300     if (!e->cmp.symbol->width) {
 301         ds_put_format(s, "%s %s ", e->cmp.symbol->name,
 302                       expr_relop_to_string(e->cmp.relop));
 303         expr_format_string(e->cmp.string, s);
 304         return;
 305     }
 306
 307     int ofs, n;
 308     find_bitwise_range(&e->cmp.mask, e->cmp.symbol->width, &ofs, &n);
 309     if (n == 1 && (e->cmp.relop == EXPR_R_EQ || e->cmp.relop == EXPR_R_NE)) {
 310         bool positive;
 311
 312         positive = bitwise_get_bit(&e->cmp.value, sizeof e->cmp.value, ofs);
 313         positive ^= e->cmp.relop == EXPR_R_NE;
 314         if (!positive) {
 315             ds_put_char(s, '!');
 316         }
 317         ds_put_cstr(s, e->cmp.symbol->name);
 318         if (e->cmp.symbol->width > 1) {
 319             ds_put_format(s, "[%d]", ofs);
 320         }
 321         return;
 322     }
 323
 324     ds_put_cstr(s, e->cmp.symbol->name);
 325     if (n > 0 && n < e->cmp.symbol->width) {
 326         if (n > 1) {
 327             ds_put_format(s, "[%d..%d]", ofs, ofs + n - 1);
 328         } else {
 329             ds_put_format(s, "[%d]", ofs);
 330         }
 331     }
 332
 333     ds_put_format(s, " %s ", expr_relop_to_string(e->cmp.relop));
 334
 335     if (n) {
 336         union mf_subvalue value;
 337
 338         memset(&value, 0, sizeof value);
 339         bitwise_copy(&e->cmp.value, sizeof e->cmp.value, ofs,
 340                      &value, sizeof value, 0,
 341                      n);
 342         mf_format_subvalue(&value, s);
 343     } else {
 344         mf_format_subvalue(&e->cmp.value, s);
 345         ds_put_char(s, '/');
 346         mf_format_subvalue(&e->cmp.mask, s);
 347     }
 348 }
 349
 350 static void
 351 expr_format_andor(const struct expr *e, const char *op, struct ds *s)
 352 {
 353     struct expr *sub;
 354     int i = 0;
 355
 356     LIST_FOR_EACH (sub, node, &e->andor) {
 357         if (i++) {
 358             ds_put_format(s, " %s ", op);
 359         }
 360
 361         if (sub->type == EXPR_T_AND || sub->type == EXPR_T_OR) {
 362             ds_put_char(s, '(');
 363             expr_format(sub, s);
 364             ds_put_char(s, ')');
 365         } else {
 366             expr_format(sub, s);
 367         }
 368     }
 369 }
 370
 371 /* Appends a string form of 'e' to 's'.  The string form is acceptable for
 372  * parsing back into an equivalent expression. */
 373 void
 374 expr_format(const struct expr *e, struct ds *s)
 375 {
 376     switch (e->type) {
 377     case EXPR_T_CMP:
 378         expr_format_cmp(e, s);
 379         break;
 380
 381     case EXPR_T_AND:
 382         expr_format_andor(e, "&&", s);
 383         break;
 384
 385     case EXPR_T_OR:
 386         expr_format_andor(e, "||", s);
 387         break;
 388
 389     case EXPR_T_BOOLEAN:
 390         ds_put_char(s, e->boolean ? '1' : '0');
 391         break;
 392     }
 393 }
 394
 395 /* Prints a string form of 'e' on stdout, followed by a new-line. */
 396 void
 397 expr_print(const struct expr *e)
 398 {
 399     struct ds output;
 400
 401     ds_init(&output);
 402     expr_format(e, &output);
 403     puts(ds_cstr(&output));
 404     ds_destroy(&output);
 405 }
 406 \f
 407 /* Parsing. */
 408
 409 /* Type of a "union expr_constant" or "struct expr_constant_set". */
 410 enum expr_constant_type {
 411     EXPR_C_INTEGER,
 412     EXPR_C_STRING
 413 };
 414
 415 /* A string or integer constant (one must know which from context). */
 416 union expr_constant {
 417     /* Integer constant.
 418      *
 419      * The width of a constant isn't always clear, e.g. if you write "1",
 420      * there's no way to tell whether you mean for that to be a 1-bit constant
 421      * or a 128-bit constant or somewhere in between. */
 422     struct {
 423         union mf_subvalue value;
 424         union mf_subvalue mask; /* Only initialized if 'masked'. */
 425         bool masked;
 426
 427         enum lex_format format; /* From the constant's lex_token. */
 428     };
 429
 430     /* Null-terminated string constant. */
 431     char *string;
 432 };
 433
 434 /* A collection of "union expr_constant"s of the same type. */
 435 struct expr_constant_set {
 436     union expr_constant *values;  /* Constants. */
 437     size_t n_values;              /* Number of constants. */
 438     enum expr_constant_type type; /* Type of the constants. */
 439     bool in_curlies;              /* Whether the constants were in {}. */
 440 };
 441
 442 /* A reference to a symbol or a subfield of a symbol.
 443  *
 444  * For string fields, ofs and n_bits are 0. */
 445 struct expr_field {
 446     const struct expr_symbol *symbol; /* The symbol. */
 447     int ofs;                          /* Starting bit offset. */
 448     int n_bits;                       /* Number of bits. */
 449 };
 450
 451 /* Context maintained during expr_parse(). */
 452 struct expr_context {
 453     struct lexer *lexer;        /* Lexer for pulling more tokens. */
 454     const struct shash *symtab; /* Symbol table. */
 455     char *error;                /* Error, if any, otherwise NULL. */
 456     bool not;                   /* True inside odd number of NOT operators. */
 457 };
 458
 459 struct expr *expr_parse__(struct expr_context *);
 460 static void expr_not(struct expr *);
 461 static void expr_constant_set_destroy(struct expr_constant_set *);
 462 static bool parse_field(struct expr_context *, struct expr_field *);
 463
 464 static bool
 465 expr_error_handle_common(struct expr_context *ctx)
 466 {
 467     if (ctx->error) {
 468         /* Already have an error, suppress this one since the cascade seems
 469          * unlikely to be useful. */
 470         return true;
 471     } else if (ctx->lexer->token.type == LEX_T_ERROR) {
 472         /* The lexer signaled an error.  Nothing at the expression level
 473          * accepts an error token, so we'll inevitably end up here with some
 474          * meaningless parse error.  Report the lexical error instead. */
 475         ctx->error = xstrdup(ctx->lexer->token.s);
 476         return true;
 477     } else {
 478         return false;
 479     }
 480 }
 481
 482 static void OVS_PRINTF_FORMAT(2, 3)
 483 expr_error(struct expr_context *ctx, const char *message, ...)
 484 {
 485     if (expr_error_handle_common(ctx)) {
 486         return;
 487     }
 488
 489     va_list args;
 490     va_start(args, message);
 491     ctx->error = xvasprintf(message, args);
 492     va_end(args);
 493 }
 494
 495 static void OVS_PRINTF_FORMAT(2, 3)
 496 expr_syntax_error(struct expr_context *ctx, const char *message, ...)
 497 {
 498     if (expr_error_handle_common(ctx)) {
 499         return;
 500     }
 501
 502     struct ds s;
 503
 504     ds_init(&s);
 505     ds_put_cstr(&s, "Syntax error ");
 506     if (ctx->lexer->token.type == LEX_T_END) {
 507         ds_put_cstr(&s, "at end of input ");
 508     } else if (ctx->lexer->start) {
 509         ds_put_format(&s, "at `%.*s' ",
 510                       (int) (ctx->lexer->input - ctx->lexer->start),
 511                       ctx->lexer->start);
 512     }
 513
 514     va_list args;
 515     va_start(args, message);
 516     ds_put_format_valist(&s, message, args);
 517     va_end(args);
 518
 519     ctx->error = ds_steal_cstr(&s);
 520 }
 521
 522 static struct expr *
 523 make_cmp__(const struct expr_field *f, enum expr_relop r,
 524              const union expr_constant *c)
 525 {
 526     struct expr *e = xzalloc(sizeof *e);
 527     e->type = EXPR_T_CMP;
 528     e->cmp.symbol = f->symbol;
 529     e->cmp.relop = r;
 530     if (f->symbol->width) {
 531         bitwise_copy(&c->value, sizeof c->value, 0,
 532                      &e->cmp.value, sizeof e->cmp.value, f->ofs,
 533                      f->n_bits);
 534         if (c->masked) {
 535             bitwise_copy(&c->mask, sizeof c->mask, 0,
 536                          &e->cmp.mask, sizeof e->cmp.mask, f->ofs,
 537                          f->n_bits);
 538         } else {
 539             bitwise_one(&e->cmp.mask, sizeof e->cmp.mask, f->ofs,
 540                         f->n_bits);
 541         }
 542     } else {
 543         e->cmp.string = xstrdup(c->string);
 544     }
 545     return e;
 546 }
 547
 548 /* Returns the minimum reasonable width for integer constant 'c'. */
 549 static int
 550 expr_constant_width(const union expr_constant *c)
 551 {
 552     if (c->masked) {
 553         return mf_subvalue_width(&c->mask);
 554     }
 555
 556     switch (c->format) {
 557     case LEX_F_DECIMAL:
 558     case LEX_F_HEXADECIMAL:
 559         return mf_subvalue_width(&c->value);
 560
 561     case LEX_F_IPV4:
 562         return 32;
 563
 564     case LEX_F_IPV6:
 565         return 128;
 566
 567     case LEX_F_ETHERNET:
 568         return 48;
 569
 570     default:
 571         OVS_NOT_REACHED();
 572     }
 573 }
 574
 575 static struct expr *
 576 make_cmp(struct expr_context *ctx,
 577          const struct expr_field *f, enum expr_relop r,
 578          struct expr_constant_set *cs)
 579 {
 580     struct expr *e = NULL;
 581
 582     if (cs->type != (f->symbol->width ? EXPR_C_INTEGER : EXPR_C_STRING)) {
 583         expr_error(ctx, "Can't compare %s field %s to %s constant.",
 584                    f->symbol->width ? "integer" : "string",
 585                    f->symbol->name,
 586                    cs->type == EXPR_C_INTEGER ? "integer" : "string");
 587         goto exit;
 588     }
 589
 590     if (r != EXPR_R_EQ && r != EXPR_R_NE) {
 591         if (cs->in_curlies) {
 592             expr_error(ctx, "Only == and != operators may be used "
 593                        "with value sets.");
 594             goto exit;
 595         }
 596         if (f->symbol->level == EXPR_L_NOMINAL ||
 597             f->symbol->level == EXPR_L_BOOLEAN) {
 598             expr_error(ctx, "Only == and != operators may be used "
 599                        "with %s field %s.",
 600                        expr_level_to_string(f->symbol->level),
 601                        f->symbol->name);
 602             goto exit;
 603         }
 604         if (cs->values[0].masked) {
 605             expr_error(ctx, "Only == and != operators may be used with "
 606                        "masked constants.  Consider using subfields instead "
 607                        "(e.g. eth.src[0..15] > 0x1111 in place of "
 608                        "eth.src > 00:00:00:00:11:11/00:00:00:00:ff:ff).");
 609             goto exit;
 610         }
 611     }
 612
 613     if (f->symbol->level == EXPR_L_NOMINAL) {
 614         if (f->symbol->expansion) {
 615             for (size_t i = 0; i < cs->n_values; i++) {
 616                 const union mf_subvalue *value = &cs->values[i].value;
 617                 bool positive = (value->integer & htonll(1)) != 0;
 618                 positive ^= r == EXPR_R_NE;
 619                 positive ^= ctx->not;
 620                 if (!positive) {
 621                     const char *name = f->symbol->name;
 622                     expr_error(ctx, "Nominal predicate %s may only be tested "
 623                                "positively, e.g. `%s' or `%s == 1' but not "
 624                                "`!%s' or `%s == 0'.",
 625                                name, name, name, name, name);
 626                     goto exit;
 627                 }
 628             }
 629         } else if (r != (ctx->not ? EXPR_R_NE : EXPR_R_EQ)) {
 630             expr_error(ctx, "Nominal field %s may only be tested for "
 631                        "equality (taking enclosing `!' operators into "
 632                        "account).", f->symbol->name);
 633             goto exit;
 634         }
 635     }
 636
 637     if (f->symbol->width) {
 638         for (size_t i = 0; i < cs->n_values; i++) {
 639             int w = expr_constant_width(&cs->values[i]);
 640             if (w > f->symbol->width) {
 641                 expr_error(ctx, "Cannot compare %d-bit constant against "
 642                            "%d-bit field %s.",
 643                            w, f->symbol->width, f->symbol->name);
 644                 goto exit;
 645             }
 646         }
 647     }
 648
 649     e = make_cmp__(f, r, &cs->values[0]);
 650     for (size_t i = 1; i < cs->n_values; i++) {
 651         e = expr_combine(r == EXPR_R_EQ ? EXPR_T_OR : EXPR_T_AND,
 652                          e, make_cmp__(f, r, &cs->values[i]));
 653     }
 654 exit:
 655     expr_constant_set_destroy(cs);
 656     return e;
 657 }
 658
 659 static bool
 660 expr_get_int(struct expr_context *ctx, int *value)
 661 {
 662     if (ctx->lexer->token.type == LEX_T_INTEGER
 663         && ctx->lexer->token.format == LEX_F_DECIMAL
 664         && ntohll(ctx->lexer->token.value.integer) <= INT_MAX) {
 665         *value = ntohll(ctx->lexer->token.value.integer);
 666         lexer_get(ctx->lexer);
 667         return true;
 668     } else {
 669         expr_syntax_error(ctx, "expecting small integer.");
 670         return false;
 671     }
 672 }
 673
 674 static bool
 675 parse_field(struct expr_context *ctx, struct expr_field *f)
 676 {
 677     const struct expr_symbol *symbol;
 678
 679     if (ctx->lexer->token.type != LEX_T_ID) {
 680         expr_syntax_error(ctx, "expecting field name.");
 681         return false;
 682     }
 683
 684     symbol = shash_find_data(ctx->symtab, ctx->lexer->token.s);
 685     if (!symbol) {
 686         expr_syntax_error(ctx, "expecting field name.");
 687         return false;
 688     }
 689     lexer_get(ctx->lexer);
 690
 691     f->symbol = symbol;
 692     if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
 693         int low, high;
 694
 695         if (!symbol->width) {
 696             expr_error(ctx, "Cannot select subfield of string field %s.",
 697                        symbol->name);
 698             return false;
 699         }
 700
 701         if (!expr_get_int(ctx, &low)) {
 702             return false;
 703         }
 704         if (lexer_match(ctx->lexer, LEX_T_ELLIPSIS)) {
 705             if (!expr_get_int(ctx, &high)) {
 706                 return false;
 707             }
 708         } else {
 709             high = low;
 710         }
 711
 712         if (!lexer_match(ctx->lexer, LEX_T_RSQUARE)) {
 713             expr_syntax_error(ctx, "expecting `]'.");
 714             return false;
 715         }
 716
 717         if (low > high) {
 718             expr_error(ctx, "Invalid bit range %d to %d.", low, high);
 719             return false;
 720         } else if (high >= symbol->width) {
 721             expr_error(ctx, "Cannot select bits %d to %d of %d-bit field %s.",
 722                        low, high, symbol->width, symbol->name);
 723             return false;
 724         } else if (symbol->level == EXPR_L_NOMINAL
 725                    && (low != 0 || high != symbol->width - 1)) {
 726             expr_error(ctx, "Cannot select subfield of nominal field %s.",
 727                        symbol->name);
 728             return false;
 729         }
 730
 731         f->ofs = low;
 732         f->n_bits = high - low + 1;
 733     } else {
 734         f->ofs = 0;
 735         f->n_bits = symbol->width;
 736     }
 737
 738     return true;
 739 }
 740
 741 static bool
 742 parse_relop(struct expr_context *ctx, enum expr_relop *relop)
 743 {
 744     if (expr_relop_from_token(ctx->lexer->token.type, relop)) {
 745         lexer_get(ctx->lexer);
 746         return true;
 747     } else {
 748         expr_syntax_error(ctx, "expecting relational operator.");
 749         return false;
 750     }
 751 }
 752
 753 static bool
 754 assign_constant_set_type(struct expr_context *ctx,
 755                          struct expr_constant_set *cs,
 756                          enum expr_constant_type type)
 757 {
 758     if (!cs->n_values || cs->type == type) {
 759         cs->type = type;
 760         return true;
 761     } else {
 762         expr_syntax_error(ctx, "expecting %s.",
 763                           cs->type == EXPR_C_INTEGER ? "integer" : "string");
 764         return false;
 765     }
 766 }
 767
 768 static bool
 769 parse_constant(struct expr_context *ctx, struct expr_constant_set *cs,
 770                size_t *allocated_values)
 771 {
 772     if (cs->n_values >= *allocated_values) {
 773         cs->values = x2nrealloc(cs->values, allocated_values,
 774                                 sizeof *cs->values);
 775     }
 776
 777     if (ctx->lexer->token.type == LEX_T_STRING) {
 778         if (!assign_constant_set_type(ctx, cs, EXPR_C_STRING)) {
 779             return false;
 780         }
 781         cs->values[cs->n_values++].string = xstrdup(ctx->lexer->token.s);
 782         lexer_get(ctx->lexer);
 783         return true;
 784     } else if (ctx->lexer->token.type == LEX_T_INTEGER ||
 785                ctx->lexer->token.type == LEX_T_MASKED_INTEGER) {
 786         if (!assign_constant_set_type(ctx, cs, EXPR_C_INTEGER)) {
 787             return false;
 788         }
 789
 790         union expr_constant *c = &cs->values[cs->n_values++];
 791         c->value = ctx->lexer->token.value;
 792         c->format = ctx->lexer->token.format;
 793         c->masked = ctx->lexer->token.type == LEX_T_MASKED_INTEGER;
 794         if (c->masked) {
 795             c->mask = ctx->lexer->token.mask;
 796         }
 797         lexer_get(ctx->lexer);
 798         return true;
 799     } else {
 800         expr_syntax_error(ctx, "expecting constant.");
 801         return false;
 802     }
 803 }
 804
 805 /* Parses a single or {}-enclosed set of integer or string constants into 'cs',
 806  * which the caller need not have initialized.  Returns true on success, in
 807  * which case the caller owns 'cs', false on failure, in which case 'cs' is
 808  * indeterminate. */
 809 static bool
 810 parse_constant_set(struct expr_context *ctx, struct expr_constant_set *cs)
 811 {
 812     size_t allocated_values = 0;
 813     bool ok;
 814
 815     memset(cs, 0, sizeof *cs);
 816     if (lexer_match(ctx->lexer, LEX_T_LCURLY)) {
 817         ok = true;
 818         cs->in_curlies = true;
 819         do {
 820             if (!parse_constant(ctx, cs, &allocated_values)) {
 821                 ok = false;
 822                 break;
 823             }
 824             lexer_match(ctx->lexer, LEX_T_COMMA);
 825         } while (!lexer_match(ctx->lexer, LEX_T_RCURLY));
 826     } else {
 827         ok = parse_constant(ctx, cs, &allocated_values);
 828     }
 829     if (!ok) {
 830         expr_constant_set_destroy(cs);
 831     }
 832     return ok;
 833 }
 834
 835 static void
 836 expr_constant_set_destroy(struct expr_constant_set *cs)
 837 {
 838     if (cs) {
 839         if (cs->type == EXPR_C_STRING) {
 840             for (size_t i = 0; i < cs->n_values; i++) {
 841                 free(cs->values[i].string);
 842             }
 843         }
 844         free(cs->values);
 845     }
 846 }
 847
 848 static struct expr *
 849 expr_parse_primary(struct expr_context *ctx, bool *atomic)
 850 {
 851     *atomic = false;
 852     if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
 853         struct expr *e = expr_parse__(ctx);
 854         if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
 855             expr_destroy(e);
 856             expr_syntax_error(ctx, "expecting `)'.");
 857             return NULL;
 858         }
 859         *atomic = true;
 860         return e;
 861     }
 862
 863     if (ctx->lexer->token.type == LEX_T_ID) {
 864         struct expr_field f;
 865         enum expr_relop r;
 866         struct expr_constant_set c;
 867
 868         if (!parse_field(ctx, &f)) {
 869             return NULL;
 870         }
 871
 872         if (!expr_relop_from_token(ctx->lexer->token.type, &r)) {
 873             if (f.n_bits > 1 && !ctx->not) {
 874                 expr_error(ctx, "Explicit `!= 0' is required for inequality "
 875                            "test of multibit field against 0.");
 876                 return NULL;
 877             }
 878
 879             *atomic = true;
 880
 881             union expr_constant *cst = xzalloc(sizeof *cst);
 882             cst->format = LEX_F_HEXADECIMAL;
 883             cst->masked = false;
 884
 885             c.type = EXPR_C_INTEGER;
 886             c.values = cst;
 887             c.n_values = 1;
 888             c.in_curlies = false;
 889             return make_cmp(ctx, &f, EXPR_R_NE, &c);
 890         } else if (parse_relop(ctx, &r) && parse_constant_set(ctx, &c)) {
 891             return make_cmp(ctx, &f, r, &c);
 892         } else {
 893             return NULL;
 894         }
 895     } else {
 896         struct expr_constant_set c1;
 897         if (!parse_constant_set(ctx, &c1)) {
 898             return NULL;
 899         }
 900
 901         if (!expr_relop_from_token(ctx->lexer->token.type, NULL)
 902             && c1.n_values == 1
 903             && c1.type == EXPR_C_INTEGER
 904             && c1.values[0].format == LEX_F_DECIMAL
 905             && !c1.values[0].masked
 906             && !c1.in_curlies) {
 907             uint64_t x = ntohll(c1.values[0].value.integer);
 908             if (x <= 1) {
 909                 *atomic = true;
 910                 expr_constant_set_destroy(&c1);
 911                 return expr_create_boolean(x);
 912             }
 913         }
 914
 915         enum expr_relop r1;
 916         struct expr_field f;
 917         if (!parse_relop(ctx, &r1) || !parse_field(ctx, &f)) {
 918             expr_constant_set_destroy(&c1);
 919             return NULL;
 920         }
 921
 922         if (!expr_relop_from_token(ctx->lexer->token.type, NULL)) {
 923             return make_cmp(ctx, &f, expr_relop_turn(r1), &c1);
 924         }
 925
 926         enum expr_relop r2;
 927         struct expr_constant_set c2;
 928         if (!parse_relop(ctx, &r2) || !parse_constant_set(ctx, &c2)) {
 929             expr_constant_set_destroy(&c1);
 930             return NULL;
 931         } else {
 932             /* Reject "1 == field == 2", "1 < field > 2", and so on. */
 933             if (!(((r1 == EXPR_R_LT || r1 == EXPR_R_LE) &&
 934                    (r2 == EXPR_R_LT || r2 == EXPR_R_LE)) ||
 935                   ((r1 == EXPR_R_GT || r1 == EXPR_R_GE) &&
 936                    (r2 == EXPR_R_GT || r2 == EXPR_R_GE)))) {
 937                 expr_error(ctx, "Range expressions must have the form "
 938                            "`x < field < y' or `x > field > y', with each "
 939                            "`<' optionally replaced by `<=' or `>' by `>=').");
 940                 expr_constant_set_destroy(&c1);
 941                 expr_constant_set_destroy(&c2);
 942                 return NULL;
 943             }
 944
 945             struct expr *e1 = make_cmp(ctx, &f, expr_relop_turn(r1), &c1);
 946             struct expr *e2 = make_cmp(ctx, &f, r2, &c2);
 947             if (ctx->error) {
 948                 expr_destroy(e1);
 949                 expr_destroy(e2);
 950                 return NULL;
 951             }
 952             return expr_combine(EXPR_T_AND, e1, e2);
 953         }
 954     }
 955 }
 956
 957 static struct expr *
 958 expr_parse_not(struct expr_context *ctx)
 959 {
 960     bool atomic;
 961
 962     if (lexer_match(ctx->lexer, LEX_T_LOG_NOT)) {
 963         ctx->not = !ctx->not;
 964         struct expr *expr = expr_parse_primary(ctx, &atomic);
 965         ctx->not = !ctx->not;
 966
 967         if (expr) {
 968             if (!atomic) {
 969                 expr_error(ctx, "Missing parentheses around operand of !.");
 970                 expr_destroy(expr);
 971                 return NULL;
 972             }
 973             expr_not(expr);
 974         }
 975         return expr;
 976     } else {
 977         return expr_parse_primary(ctx, &atomic);
 978     }
 979 }
 980
 981 struct expr *
 982 expr_parse__(struct expr_context *ctx)
 983 {
 984     struct expr *e = expr_parse_not(ctx);
 985     if (!e) {
 986         return NULL;
 987     }
 988
 989     enum lex_type lex_type = ctx->lexer->token.type;
 990     if (lex_type == LEX_T_LOG_AND || lex_type == LEX_T_LOG_OR) {
 991         enum expr_type expr_type
 992             = lex_type == LEX_T_LOG_AND ? EXPR_T_AND : EXPR_T_OR;
 993
 994         lexer_get(ctx->lexer);
 995         do {
 996             struct expr *e2 = expr_parse_not(ctx);
 997             if (!e2) {
 998                 expr_destroy(e);
 999                 return NULL;
1000             }
1001             e = expr_combine(expr_type, e, e2);
1002         } while (lexer_match(ctx->lexer, lex_type));
1003         if (ctx->lexer->token.type == LEX_T_LOG_AND
1004             || ctx->lexer->token.type == LEX_T_LOG_OR) {
1005             expr_destroy(e);
1006             expr_error(ctx,
1007                        "&& and || must be parenthesized when used together.");
1008             return NULL;
1009         }
1010     }
1011     return e;
1012 }
1013
1014 /* Parses an expression using the symbols in 'symtab' from 'lexer'.  If
1015  * successful, returns the new expression and sets '*errorp' to NULL.  On
1016  * failure, returns NULL and sets '*errorp' to an explanatory error message.
1017  * The caller must eventually free the returned expression (with
1018  * expr_destroy()) or error (with free()). */
1019 struct expr *
1020 expr_parse(struct lexer *lexer, const struct shash *symtab, char **errorp)
1021 {
1022     struct expr_context ctx;
1023
1024     ctx.lexer = lexer;
1025     ctx.symtab = symtab;
1026     ctx.error = NULL;
1027     ctx.not = false;
1028
1029     struct expr *e = expr_parse__(&ctx);
1030     *errorp = ctx.error;
1031     ovs_assert((ctx.error != NULL) != (e != NULL));
1032     return e;
1033 }
1034
1035 /* Like expr_parse(), but the expression is taken from 's'. */
1036 struct expr *
1037 expr_parse_string(const char *s, const struct shash *symtab, char **errorp)
1038 {
1039     struct lexer lexer;
1040     struct expr *expr;
1041
1042     lexer_init(&lexer, s);
1043     lexer_get(&lexer);
1044     expr = expr_parse(&lexer, symtab, errorp);
1045     if (!errorp && lexer.token.type != LEX_T_END) {
1046         *errorp = xstrdup("Extra tokens at end of input.");
1047         expr_destroy(expr);
1048         expr = NULL;
1049     }
1050     lexer_destroy(&lexer);
1051
1052     return expr;
1053 }
1054 \f
1055 static struct expr_symbol *
1056 add_symbol(struct shash *symtab, const char *name, int width,
1057            const char *prereqs, enum expr_level level,
1058            bool must_crossproduct)
1059 {
1060     struct expr_symbol *symbol = xzalloc(sizeof *symbol);
1061     symbol->name = xstrdup(name);
1062     symbol->prereqs = prereqs && prereqs[0] ? xstrdup(prereqs) : NULL;
1063     symbol->width = width;
1064     symbol->level = level;
1065     symbol->must_crossproduct = must_crossproduct;
1066     shash_add_assert(symtab, symbol->name, symbol);
1067     return symbol;
1068 }
1069
1070 /* Adds field 'id' to symbol table 'symtab' under the given 'name'.  Whenever
1071  * 'name' is referenced, expression annotation (see expr_annotate()) will
1072  * ensure that 'prereqs' are also true.  If 'must_crossproduct' is true, then
1073  * conversion to flows will never attempt to use the field as a conjunctive
1074  * match dimension (see "Crossproducting" in the large comment on struct
1075  * expr_symbol in expr.h for an example).
1076  *
1077  * A given field 'id' must only be used for a single symbol in a symbol table.
1078  * Use subfields to duplicate or subset a field (you can even make a subfield
1079  * include all the bits of the "parent" field if you like). */
1080 struct expr_symbol *
1081 expr_symtab_add_field(struct shash *symtab, const char *name,
1082                       enum mf_field_id id, const char *prereqs,
1083                       bool must_crossproduct)
1084 {
1085     const struct mf_field *field = mf_from_id(id);
1086     struct expr_symbol *symbol;
1087
1088     symbol = add_symbol(symtab, name, field->n_bits, prereqs,
1089                         (field->maskable == MFM_FULLY
1090                          ? EXPR_L_ORDINAL
1091                          : EXPR_L_NOMINAL),
1092                         must_crossproduct);
1093     symbol->field = field;
1094     return symbol;
1095 }
1096
1097 static bool
1098 parse_field_from_string(const char *s, const struct shash *symtab,
1099                         struct expr_field *field, char **errorp)
1100 {
1101     struct lexer lexer;
1102     lexer_init(&lexer, s);
1103     lexer_get(&lexer);
1104
1105     struct expr_context ctx;
1106     ctx.lexer = &lexer;
1107     ctx.symtab = symtab;
1108     ctx.error = NULL;
1109     ctx.not = false;
1110
1111     bool ok = parse_field(&ctx, field);
1112     if (!ok) {
1113         *errorp = ctx.error;
1114     } else if (lexer.token.type != LEX_T_END) {
1115         *errorp = xstrdup("Extra tokens at end of input.");
1116         ok = false;
1117     }
1118
1119     lexer_destroy(&lexer);
1120
1121     return ok;
1122 }
1123
1124 /* Adds 'name' as a subfield of a larger field in 'symtab'.  Whenever
1125  * 'name' is referenced, expression annotation (see expr_annotate()) will
1126  * ensure that 'prereqs' are also true.
1127  *
1128  * 'subfield' must describe the subfield as a string, e.g. "vlan.tci[0..11]"
1129  * for the low 12 bits of a larger field named "vlan.tci". */
1130 struct expr_symbol *
1131 expr_symtab_add_subfield(struct shash *symtab, const char *name,
1132                          const char *prereqs, const char *subfield)
1133 {
1134     struct expr_symbol *symbol;
1135     struct expr_field f;
1136     char *error;
1137
1138     if (!parse_field_from_string(subfield, symtab, &f, &error)) {
1139         VLOG_WARN("%s: error parsing %s subfield (%s)", subfield, name, error);
1140         free(error);
1141         return NULL;
1142     }
1143
1144     enum expr_level level = f.symbol->level;
1145     if (level != EXPR_L_ORDINAL) {
1146         VLOG_WARN("can't define %s as subfield of %s field %s",
1147                   name, expr_level_to_string(level), f.symbol->name);
1148     }
1149
1150     symbol = add_symbol(symtab, name, f.n_bits, prereqs, level, false);
1151     symbol->expansion = xstrdup(subfield);
1152     return symbol;
1153 }
1154
1155 /* Adds a string-valued symbol named 'name' to 'symtab' with the specified
1156  * 'prereqs'. */
1157 struct expr_symbol *
1158 expr_symtab_add_string(struct shash *symtab, const char *name,
1159                        enum mf_field_id id, const char *prereqs)
1160 {
1161     const struct mf_field *field = mf_from_id(id);
1162     struct expr_symbol *symbol;
1163
1164     symbol = add_symbol(symtab, name, 0, prereqs, EXPR_L_NOMINAL, false);
1165     symbol->field = field;
1166     return symbol;
1167 }
1168
1169 static enum expr_level
1170 expr_get_level(const struct expr *expr)
1171 {
1172     const struct expr *sub;
1173     enum expr_level level = EXPR_L_ORDINAL;
1174
1175     switch (expr->type) {
1176     case EXPR_T_CMP:
1177         return (expr->cmp.symbol->level == EXPR_L_NOMINAL
1178                 ? EXPR_L_NOMINAL
1179                 : EXPR_L_BOOLEAN);
1180
1181     case EXPR_T_AND:
1182     case EXPR_T_OR:
1183         LIST_FOR_EACH (sub, node, &expr->andor) {
1184             enum expr_level sub_level = expr_get_level(sub);
1185             level = MIN(level, sub_level);
1186         }
1187         return level;
1188
1189     case EXPR_T_BOOLEAN:
1190         return EXPR_L_BOOLEAN;
1191
1192     default:
1193         OVS_NOT_REACHED();
1194     }
1195 }
1196
1197 static enum expr_level
1198 expr_parse_level(const char *s, const struct shash *symtab, char **errorp)
1199 {
1200     struct expr *expr = expr_parse_string(s, symtab, errorp);
1201     enum expr_level level = expr ? expr_get_level(expr) : EXPR_L_NOMINAL;
1202     expr_destroy(expr);
1203     return level;
1204 }
1205
1206 /* Adds a predicate symbol, whose value is the given Boolean 'expression',
1207  * named 'name' to 'symtab'.  For example, "ip4 && ip4.proto == 1" might be an
1208  * appropriate predicate named "tcp4". */
1209 struct expr_symbol *
1210 expr_symtab_add_predicate(struct shash *symtab, const char *name,
1211                           const char *expansion)
1212 {
1213     struct expr_symbol *symbol;
1214     enum expr_level level;
1215     char *error;
1216
1217     level = expr_parse_level(expansion, symtab, &error);
1218     if (error) {
1219         VLOG_WARN("%s: error parsing %s expansion (%s)",
1220                   expansion, name, error);
1221         free(error);
1222         return NULL;
1223     }
1224
1225     symbol = add_symbol(symtab, name, 1, NULL, level, false);
1226     symbol->expansion = xstrdup(expansion);
1227     return symbol;
1228 }
1229
1230 /* Destroys 'symtab' and all of its symbols. */
1231 void
1232 expr_symtab_destroy(struct shash *symtab)
1233 {
1234     struct shash_node *node, *next;
1235
1236     SHASH_FOR_EACH_SAFE (node, next, symtab) {
1237         struct expr_symbol *symbol = node->data;
1238
1239         shash_delete(symtab, node);
1240         free(symbol->name);
1241         free(symbol->prereqs);
1242         free(symbol->expansion);
1243         free(symbol);
1244     }
1245 }
1246 \f
1247 /* Cloning. */
1248
1249 static struct expr *
1250 expr_clone_cmp(struct expr *expr)
1251 {
1252     struct expr *new = xmemdup(expr, sizeof *expr);
1253     if (!new->cmp.symbol->width) {
1254         new->cmp.string = xstrdup(new->cmp.string);
1255     }
1256     return new;
1257 }
1258
1259 static struct expr *
1260 expr_clone_andor(struct expr *expr)
1261 {
1262     struct expr *new = expr_create_andor(expr->type);
1263     struct expr *sub;
1264
1265     LIST_FOR_EACH (sub, node, &expr->andor) {
1266         struct expr *new_sub = expr_clone(sub);
1267         list_push_back(&new->andor, &new_sub->node);
1268     }
1269     return new;
1270 }
1271
1272 /* Returns a clone of 'expr'.  This is a "deep copy": neither the returned
1273  * expression nor any of its substructure will be shared with 'expr'. */
1274 struct expr *
1275 expr_clone(struct expr *expr)
1276 {
1277     switch (expr->type) {
1278     case EXPR_T_CMP:
1279         return expr_clone_cmp(expr);
1280
1281     case EXPR_T_AND:
1282     case EXPR_T_OR:
1283         return expr_clone_andor(expr);
1284
1285     case EXPR_T_BOOLEAN:
1286         return expr_create_boolean(expr->boolean);
1287     }
1288     OVS_NOT_REACHED();
1289 }
1290 \f
1291 /* Destroys 'expr' and all of the sub-expressions it references. */
1292 void
1293 expr_destroy(struct expr *expr)
1294 {
1295     if (!expr) {
1296         return;
1297     }
1298
1299     struct expr *sub, *next;
1300
1301     switch (expr->type) {
1302     case EXPR_T_CMP:
1303         if (!expr->cmp.symbol->width) {
1304             free(expr->cmp.string);
1305         }
1306         break;
1307
1308     case EXPR_T_AND:
1309     case EXPR_T_OR:
1310         LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1311             list_remove(&sub->node);
1312             expr_destroy(sub);
1313         }
1314         break;
1315
1316     case EXPR_T_BOOLEAN:
1317         break;
1318     }
1319     free(expr);
1320 }
1321 \f
1322 /* Annotation. */
1323
1324 /* An element in a linked list of symbols.
1325  *
1326  * Used to detect when a symbol is being expanded recursively, to allow
1327  * flagging an error. */
1328 struct annotation_nesting {
1329     struct ovs_list node;
1330     const struct expr_symbol *symbol;
1331 };
1332
1333 struct expr *expr_annotate__(struct expr *, const struct shash *symtab,
1334                              struct ovs_list *nesting, char **errorp);
1335
1336 static struct expr *
1337 parse_and_annotate(const char *s, const struct shash *symtab,
1338                    struct ovs_list *nesting, char **errorp)
1339 {
1340     char *error;
1341     struct expr *expr;
1342
1343     expr = expr_parse_string(s, symtab, &error);
1344     if (expr) {
1345         expr = expr_annotate__(expr, symtab, nesting, &error);
1346     }
1347     if (!expr) {
1348         *errorp = xasprintf("Error parsing expression `%s' encountered as "
1349                             "prerequisite or predicate of initial expression: "
1350                             "%s", s, error);
1351         free(error);
1352     }
1353     return expr;
1354 }
1355
1356 static struct expr *
1357 expr_annotate_cmp(struct expr *expr, const struct shash *symtab,
1358                   struct ovs_list *nesting, char **errorp)
1359 {
1360     const struct expr_symbol *symbol = expr->cmp.symbol;
1361     const struct annotation_nesting *iter;
1362     LIST_FOR_EACH (iter, node, nesting) {
1363         if (iter->symbol == symbol) {
1364             *errorp = xasprintf("Recursive expansion of symbol `%s'.",
1365                                 symbol->name);
1366             expr_destroy(expr);
1367             return NULL;
1368         }
1369     }
1370
1371     struct annotation_nesting an;
1372     an.symbol = symbol;
1373     list_push_back(nesting, &an.node);
1374
1375     struct expr *prereqs = NULL;
1376     if (symbol->prereqs) {
1377         prereqs = parse_and_annotate(symbol->prereqs, symtab, nesting, errorp);
1378         if (!prereqs) {
1379             goto error;
1380         }
1381     }
1382
1383     if (symbol->expansion) {
1384         if (symbol->level == EXPR_L_ORDINAL) {
1385             struct expr_field field;
1386
1387             if (!parse_field_from_string(symbol->expansion, symtab,
1388                                          &field, errorp)) {
1389                 goto error;
1390             }
1391
1392             expr->cmp.symbol = field.symbol;
1393             mf_subvalue_shift(&expr->cmp.value, field.ofs);
1394             mf_subvalue_shift(&expr->cmp.mask, field.ofs);
1395         } else {
1396             struct expr *expansion;
1397
1398             expansion = parse_and_annotate(symbol->expansion, symtab,
1399                                            nesting, errorp);
1400             if (!expansion) {
1401                 goto error;
1402             }
1403
1404             bool positive = (expr->cmp.value.integer & htonll(1)) != 0;
1405             positive ^= expr->cmp.relop == EXPR_R_NE;
1406             if (!positive) {
1407                 expr_not(expansion);
1408             }
1409
1410             expr_destroy(expr);
1411             expr = expansion;
1412         }
1413     }
1414
1415     list_remove(&an.node);
1416     return prereqs ? expr_combine(EXPR_T_AND, expr, prereqs) : expr;
1417
1418 error:
1419     expr_destroy(expr);
1420     expr_destroy(prereqs);
1421     list_remove(&an.node);
1422     return NULL;
1423 }
1424
1425 struct expr *
1426 expr_annotate__(struct expr *expr, const struct shash *symtab,
1427                 struct ovs_list *nesting, char **errorp)
1428 {
1429     switch (expr->type) {
1430     case EXPR_T_CMP:
1431         return expr_annotate_cmp(expr, symtab, nesting, errorp);
1432
1433     case EXPR_T_AND:
1434     case EXPR_T_OR: {
1435         struct expr *sub, *next;
1436
1437         LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1438             list_remove(&sub->node);
1439             struct expr *new_sub = expr_annotate__(sub, symtab,
1440                                                    nesting, errorp);
1441             if (!new_sub) {
1442                 expr_destroy(expr);
1443                 return NULL;
1444             }
1445             expr_insert_andor(expr, next, new_sub);
1446         }
1447         *errorp = NULL;
1448         return expr;
1449     }
1450
1451     case EXPR_T_BOOLEAN:
1452         *errorp = NULL;
1453         return expr;
1454
1455     default:
1456         OVS_NOT_REACHED();
1457     }
1458 }
1459
1460 /* "Annotates" 'expr', which does the following:
1461  *
1462  *     - Applies prerequisites, by locating each comparison operator whose
1463  *       field has a prerequisite and adding a logical AND against those
1464  *       prerequisites.
1465  *
1466  *     - Expands references to subfield symbols, by replacing them by
1467  *       references to their underlying field symbols (suitably shifted).
1468  *
1469  *     - Expands references to predicate symbols, by replacing them by the
1470  *       expressions that they expand to.
1471  *
1472  * In each case, annotation occurs recursively as necessary. */
1473 struct expr *
1474 expr_annotate(struct expr *expr, const struct shash *symtab, char **errorp)
1475 {
1476     struct ovs_list nesting = OVS_LIST_INITIALIZER(&nesting);
1477     return expr_annotate__(expr, symtab, &nesting, errorp);
1478 }
1479 \f
1480 static struct expr *
1481 expr_simplify_ne(struct expr *expr)
1482 {
1483     struct expr *new = NULL;
1484     const union mf_subvalue *value = &expr->cmp.value;
1485     const union mf_subvalue *mask = &expr->cmp.mask;
1486     int w = expr->cmp.symbol->width;
1487     int i;
1488
1489     for (i = 0; (i = bitwise_scan(mask, sizeof *mask, true, i, w)) < w; i++) {
1490         struct expr *e;
1491
1492         e = xzalloc(sizeof *e);
1493         e->type = EXPR_T_CMP;
1494         e->cmp.symbol = expr->cmp.symbol;
1495         e->cmp.relop = EXPR_R_EQ;
1496         bitwise_put_bit(&e->cmp.value, sizeof e->cmp.value, i,
1497                         !bitwise_get_bit(value, sizeof *value, i));
1498         bitwise_put1(&e->cmp.mask, sizeof e->cmp.mask, i);
1499
1500         new = expr_combine(EXPR_T_OR, new, e);
1501     }
1502     ovs_assert(new);
1503
1504     expr_destroy(expr);
1505
1506     return new;
1507 }
1508
1509 static struct expr *
1510 expr_simplify_relational(struct expr *expr)
1511 {
1512     const union mf_subvalue *value = &expr->cmp.value;
1513     int start, n_bits, end;
1514
1515     find_bitwise_range(&expr->cmp.mask, expr->cmp.symbol->width,
1516                        &start, &n_bits);
1517     ovs_assert(n_bits > 0);
1518     end = start + n_bits;
1519
1520     struct expr *new;
1521     if (expr->cmp.relop == EXPR_R_LE || expr->cmp.relop == EXPR_R_GE) {
1522         new = xmemdup(expr, sizeof *expr);
1523         new->cmp.relop = EXPR_R_EQ;
1524     } else {
1525         new = NULL;
1526     }
1527
1528     bool b = expr->cmp.relop == EXPR_R_LT || expr->cmp.relop == EXPR_R_LE;
1529     for (int z = bitwise_scan(value, sizeof *value, b, start, end);
1530          z < end;
1531          z = bitwise_scan(value, sizeof *value, b, z + 1, end)) {
1532         struct expr *e;
1533
1534         e = xmemdup(expr, sizeof *expr);
1535         e->cmp.relop = EXPR_R_EQ;
1536         bitwise_toggle_bit(&e->cmp.value, sizeof e->cmp.value, z);
1537         bitwise_zero(&e->cmp.value, sizeof e->cmp.value, start, z - start);
1538         bitwise_zero(&e->cmp.mask, sizeof e->cmp.mask, start, z - start);
1539         new = expr_combine(EXPR_T_OR, new, e);
1540     }
1541     expr_destroy(expr);
1542     return new ? new : expr_create_boolean(false);
1543 }
1544
1545 /* Takes ownership of 'expr' and returns an equivalent expression whose
1546  * EXPR_T_CMP nodes use only tests for equality (EXPR_R_EQ). */
1547 struct expr *
1548 expr_simplify(struct expr *expr)
1549 {
1550     struct expr *sub, *next;
1551
1552     switch (expr->type) {
1553     case EXPR_T_CMP:
1554         return (expr->cmp.relop == EXPR_R_EQ || !expr->cmp.symbol->width ? expr
1555                 : expr->cmp.relop == EXPR_R_NE ? expr_simplify_ne(expr)
1556                 : expr_simplify_relational(expr));
1557
1558     case EXPR_T_AND:
1559     case EXPR_T_OR:
1560         LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1561             list_remove(&sub->node);
1562             expr_insert_andor(expr, next, expr_simplify(sub));
1563         }
1564         return expr_fix(expr);
1565
1566     case EXPR_T_BOOLEAN:
1567         return expr;
1568     }
1569     OVS_NOT_REACHED();
1570 }
1571 \f
1572 static const struct expr_symbol *
1573 expr_is_cmp(const struct expr *expr)
1574 {
1575     switch (expr->type) {
1576     case EXPR_T_CMP:
1577         return expr->cmp.symbol;
1578
1579     case EXPR_T_AND:
1580     case EXPR_T_OR: {
1581         const struct expr_symbol *prev = NULL;
1582         struct expr *sub;
1583
1584         LIST_FOR_EACH (sub, node, &expr->andor) {
1585             const struct expr_symbol *symbol = expr_is_cmp(sub);
1586             if (!symbol || (prev && symbol != prev)) {
1587                 return NULL;
1588             }
1589             prev = symbol;
1590         }
1591         return prev;
1592     }
1593
1594     case EXPR_T_BOOLEAN:
1595         return NULL;
1596
1597     default:
1598         OVS_NOT_REACHED();
1599     }
1600 }
1601
1602 struct expr_sort {
1603     struct expr *expr;
1604     const struct expr_symbol *relop;
1605     enum expr_type type;
1606 };
1607
1608 static int
1609 compare_expr_sort(const void *a_, const void *b_)
1610 {
1611     const struct expr_sort *a = a_;
1612     const struct expr_sort *b = b_;
1613
1614     if (a->type != b->type) {
1615         return a->type < b->type ? -1 : 1;
1616     } else if (a->relop) {
1617         int cmp = strcmp(a->relop->name, b->relop->name);
1618         if (cmp) {
1619             return cmp;
1620         }
1621
1622         enum expr_type a_type = a->expr->type;
1623         enum expr_type b_type = a->expr->type;
1624         return a_type < b_type ? -1 : a_type > b_type;
1625     } else if (a->type == EXPR_T_AND || a->type == EXPR_T_OR) {
1626         size_t a_len = list_size(&a->expr->andor);
1627         size_t b_len = list_size(&b->expr->andor);
1628         return a_len < b_len ? -1 : a_len > b_len;
1629     } else {
1630         return 0;
1631     }
1632 }
1633
1634 static struct expr *crush_cmps(struct expr *, const struct expr_symbol *);
1635
1636 static struct expr *
1637 crush_and(struct expr *expr, const struct expr_symbol *symbol)
1638 {
1639     ovs_assert(!list_is_short(&expr->andor));
1640
1641     union mf_subvalue value, mask;
1642     memset(&value, 0, sizeof value);
1643     memset(&mask, 0, sizeof mask);
1644
1645     struct expr *sub, *next = NULL;
1646     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1647         list_remove(&sub->node);
1648         struct expr *new = crush_cmps(sub, symbol);
1649         switch (new->type) {
1650         case EXPR_T_CMP:
1651             if (!mf_subvalue_intersect(&value, &mask,
1652                                        &new->cmp.value, &new->cmp.mask,
1653                                        &value, &mask)) {
1654                 expr_destroy(new);
1655                 expr_destroy(expr);
1656                 return expr_create_boolean(false);
1657             }
1658             expr_destroy(new);
1659             break;
1660         case EXPR_T_AND:
1661             OVS_NOT_REACHED();
1662         case EXPR_T_OR:
1663             list_insert(&next->node, &new->node);
1664             break;
1665         case EXPR_T_BOOLEAN:
1666             if (!new->boolean) {
1667                 expr_destroy(expr);
1668                 return new;
1669             }
1670             free(new);
1671             break;
1672         }
1673     }
1674     if (list_is_empty(&expr->andor)) {
1675         if (is_all_zeros(&mask, sizeof mask)) {
1676             expr_destroy(expr);
1677             return expr_create_boolean(true);
1678         } else {
1679             struct expr *cmp;
1680             cmp = xmalloc(sizeof *cmp);
1681             cmp->type = EXPR_T_CMP;
1682             cmp->cmp.symbol = symbol;
1683             cmp->cmp.relop = EXPR_R_EQ;
1684             cmp->cmp.value = value;
1685             cmp->cmp.mask = mask;
1686             expr_destroy(expr);
1687             return cmp;
1688         }
1689     } else if (list_is_short(&expr->andor)) {
1690         /* Transform "a && (b || c || d)" into "ab || ac || ad" where "ab" is
1691          * computed as "a && b", etc. */
1692         struct expr *disjuncts = expr_from_node(list_pop_front(&expr->andor));
1693         struct expr *or;
1694
1695         or = xmalloc(sizeof *or);
1696         or->type = EXPR_T_OR;
1697         list_init(&or->andor);
1698
1699         ovs_assert(disjuncts->type == EXPR_T_OR);
1700         LIST_FOR_EACH_SAFE (sub, next, node, &disjuncts->andor) {
1701             ovs_assert(sub->type == EXPR_T_CMP);
1702             list_remove(&sub->node);
1703             if (mf_subvalue_intersect(&value, &mask,
1704                                       &sub->cmp.value, &sub->cmp.mask,
1705                                       &sub->cmp.value, &sub->cmp.mask)) {
1706                 list_push_back(&or->andor, &sub->node);
1707             } else {
1708                 free(sub);
1709             }
1710         }
1711         free(disjuncts);
1712         free(expr);
1713         if (list_is_empty(&or->andor)) {
1714             free(or);
1715             return expr_create_boolean(false);
1716         } else if (list_is_short(&or->andor)) {
1717             struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
1718             free(or);
1719             return cmp;
1720         } else {
1721             return or;
1722         }
1723     } else {
1724         /* Transform "x && (a0 || a1) && (b0 || b1) && ..." into
1725          *           "(xa0b0 || xa0b1 || xa1b0 || xa1b1) && ...". */
1726         struct expr *as = expr_from_node(list_pop_front(&expr->andor));
1727         struct expr *bs = expr_from_node(list_pop_front(&expr->andor));
1728         struct expr *new = NULL;
1729         struct expr *or;
1730
1731         or = xmalloc(sizeof *or);
1732         or->type = EXPR_T_OR;
1733         list_init(&or->andor);
1734
1735         struct expr *a;
1736         LIST_FOR_EACH (a, node, &as->andor) {
1737             union mf_subvalue a_value, a_mask;
1738
1739             ovs_assert(a->type == EXPR_T_CMP);
1740             if (!mf_subvalue_intersect(&value, &mask,
1741                                        &a->cmp.value, &a->cmp.mask,
1742                                        &a_value, &a_mask)) {
1743                 continue;
1744             }
1745
1746             struct expr *b;
1747             LIST_FOR_EACH (b, node, &bs->andor) {
1748                 ovs_assert(b->type == EXPR_T_CMP);
1749                 if (!new) {
1750                     new = xmalloc(sizeof *new);
1751                     new->type = EXPR_T_CMP;
1752                     new->cmp.symbol = symbol;
1753                     new->cmp.relop = EXPR_R_EQ;
1754                 }
1755                 if (mf_subvalue_intersect(&a_value, &a_mask,
1756                                           &b->cmp.value, &b->cmp.mask,
1757                                           &new->cmp.value, &new->cmp.mask)) {
1758                     list_push_back(&or->andor, &new->node);
1759                     new = NULL;
1760                 }
1761             }
1762         }
1763         expr_destroy(as);
1764         expr_destroy(bs);
1765         free(new);
1766
1767         if (list_is_empty(&or->andor)) {
1768             expr_destroy(expr);
1769             free(or);
1770             return expr_create_boolean(false);
1771         } else if (list_is_short(&or->andor)) {
1772             struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
1773             free(or);
1774             if (list_is_empty(&expr->andor)) {
1775                 expr_destroy(expr);
1776                 return crush_cmps(cmp, symbol);
1777             } else {
1778                 return crush_cmps(expr_combine(EXPR_T_AND, cmp, expr), symbol);
1779             }
1780         } else if (!list_is_empty(&expr->andor)) {
1781             struct expr *e = expr_combine(EXPR_T_AND, or, expr);
1782             ovs_assert(!list_is_short(&e->andor));
1783             return crush_cmps(e, symbol);
1784         } else {
1785             expr_destroy(expr);
1786             return crush_cmps(or, symbol);
1787         }
1788     }
1789 }
1790
1791 static int
1792 compare_expr(const void *a_, const void *b_)
1793 {
1794     const struct expr *const *ap = a_;
1795     const struct expr *const *bp = b_;
1796     const struct expr *a = *ap;
1797     const struct expr *b = *bp;
1798     int d = memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value);
1799     if (!d) {
1800         d = memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask);
1801     }
1802     return d;
1803 }
1804
1805 static struct expr *
1806 crush_or(struct expr *expr, const struct expr_symbol *symbol)
1807 {
1808     struct expr *sub, *next = NULL;
1809
1810     /* First, crush all the subexpressions.  That might eliminate the
1811      * OR-expression entirely; if so, return the result. */
1812     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
1813         list_remove(&sub->node);
1814         expr_insert_andor(expr, next, crush_cmps(sub, symbol));
1815     }
1816     expr = expr_fix(expr);
1817     if (expr->type != EXPR_T_OR) {
1818         return expr;
1819     }
1820
1821     /* Eliminate duplicates by sorting the subexpressions. */
1822     size_t n = list_size(&expr->andor);
1823     struct expr **subs = xmalloc(n * sizeof *subs);
1824
1825     size_t i = 0;
1826     LIST_FOR_EACH (sub, node, &expr->andor) {
1827         subs[i++] = sub;
1828     }
1829     ovs_assert(i == n);
1830
1831     qsort(subs, n, sizeof *subs, compare_expr);
1832
1833     list_init(&expr->andor);
1834     list_push_back(&expr->andor, &subs[0]->node);
1835     for (i = 1; i < n; i++) {
1836         struct expr *a = expr_from_node(list_back(&expr->andor));
1837         struct expr *b = subs[i];
1838         if (memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value)
1839             || memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask)) {
1840             list_push_back(&expr->andor, &b->node);
1841         } else {
1842             free(b);
1843         }
1844     }
1845     free(subs);
1846     return expr_fix(expr);
1847 }
1848
1849 /* Converts 'expr', which must be a cmp in the sense determined by
1850  * expr_is_cmp().  Returns a cmp, a disjunction of cmps, or a boolean. */
1851 static struct expr *
1852 crush_cmps(struct expr *expr, const struct expr_symbol *symbol)
1853 {
1854     switch (expr->type) {
1855     case EXPR_T_OR:
1856         return crush_or(expr, symbol);
1857
1858     case EXPR_T_AND:
1859         return crush_and(expr, symbol);
1860
1861     case EXPR_T_CMP:
1862         return expr;
1863
1864     case EXPR_T_BOOLEAN:
1865         return expr;
1866
1867     default:
1868         OVS_NOT_REACHED();
1869     }
1870 }
1871
1872 static struct expr *
1873 expr_sort(struct expr *expr)
1874 {
1875     size_t n = list_size(&expr->andor);
1876     struct expr_sort *subs = xmalloc(n * sizeof *subs);
1877     struct expr *sub;
1878     size_t i;
1879
1880     i = 0;
1881     LIST_FOR_EACH (sub, node, &expr->andor) {
1882         subs[i].expr = sub;
1883         subs[i].relop = expr_is_cmp(sub);
1884         subs[i].type = subs[i].relop ? EXPR_T_CMP : sub->type;
1885         i++;
1886     }
1887     ovs_assert(i == n);
1888
1889     qsort(subs, n, sizeof *subs, compare_expr_sort);
1890
1891     list_init(&expr->andor);
1892     for (int i = 0; i < n; ) {
1893         if (subs[i].relop) {
1894             int j;
1895             for (j = i + 1; j < n; j++) {
1896                 if (subs[i].relop != subs[j].relop) {
1897                     break;
1898                 }
1899             }
1900
1901             struct expr *crushed;
1902             if (j == i + 1) {
1903                 crushed = crush_cmps(subs[i].expr, subs[i].relop);
1904             } else {
1905                 struct expr *combined = subs[i].expr;
1906                 for (int k = i + 1; k < j; k++) {
1907                     combined = expr_combine(EXPR_T_AND, combined,
1908                                             subs[k].expr);
1909                 }
1910                 ovs_assert(!list_is_short(&combined->andor));
1911                 crushed = crush_cmps(combined, subs[i].relop);
1912             }
1913             if (crushed->type == EXPR_T_BOOLEAN) {
1914                 if (!crushed->boolean) {
1915                     for (int k = j; k < n; k++) {
1916                         expr_destroy(subs[k].expr);
1917                     }
1918                     expr_destroy(expr);
1919                     expr = crushed;
1920                     break;
1921                 } else {
1922                     free(crushed);
1923                 }
1924             } else {
1925                 expr = expr_combine(EXPR_T_AND, expr, crushed);
1926             }
1927             i = j;
1928         } else {
1929             expr = expr_combine(EXPR_T_AND, expr, subs[i++].expr);
1930         }
1931     }
1932     free(subs);
1933
1934     return expr;
1935 }
1936
1937 static struct expr *expr_normalize_or(struct expr *expr);
1938
1939 /* Returns 'expr', which is an AND, reduced to OR(AND(clause)) where
1940  * a clause is a cmp or a disjunction of cmps on a single field. */
1941 static struct expr *
1942 expr_normalize_and(struct expr *expr)
1943 {
1944     ovs_assert(expr->type == EXPR_T_AND);
1945
1946     expr = expr_sort(expr);
1947     if (expr->type != EXPR_T_AND) {
1948         ovs_assert(expr->type == EXPR_T_BOOLEAN);
1949         return expr;
1950     }
1951
1952     struct expr *a, *b;
1953     LIST_FOR_EACH_SAFE (a, b, node, &expr->andor) {
1954         if (&b->node == &expr->andor
1955             || a->type != EXPR_T_CMP || b->type != EXPR_T_CMP) {
1956         } else if (a->cmp.symbol != b->cmp.symbol) {
1957             continue;
1958         } else if (mf_subvalue_intersect(&a->cmp.value, &a->cmp.mask,
1959                                          &b->cmp.value, &b->cmp.mask,
1960                                          &b->cmp.value, &b->cmp.mask)) {
1961             list_remove(&a->node);
1962             expr_destroy(a);
1963         } else {
1964             expr_destroy(expr);
1965             return expr_create_boolean(false);
1966         }
1967     }
1968     if (list_is_short(&expr->andor)) {
1969         struct expr *sub = expr_from_node(list_front(&expr->andor));
1970         free(expr);
1971         return sub;
1972     }
1973
1974     struct expr *sub;
1975     LIST_FOR_EACH (sub, node, &expr->andor) {
1976         if (sub->type == EXPR_T_CMP) {
1977             continue;
1978         }
1979
1980         ovs_assert(sub->type == EXPR_T_OR);
1981         const struct expr_symbol *symbol = expr_is_cmp(sub);
1982         if (!symbol || symbol->must_crossproduct) {
1983             struct expr *or = expr_create_andor(EXPR_T_OR);
1984             struct expr *k;
1985
1986             LIST_FOR_EACH (k, node, &sub->andor) {
1987                 struct expr *and = expr_create_andor(EXPR_T_AND);
1988                 struct expr *m;
1989
1990                 LIST_FOR_EACH (m, node, &expr->andor) {
1991                     struct expr *term = m == sub ? k : m;
1992                     if (term->type == EXPR_T_AND) {
1993                         struct expr *p;
1994
1995                         LIST_FOR_EACH (p, node, &term->andor) {
1996                             struct expr *new = expr_clone(p);
1997                             list_push_back(&and->andor, &new->node);
1998                         }
1999                     } else {
2000                         struct expr *new = expr_clone(term);
2001                         list_push_back(&and->andor, &new->node);
2002                     }
2003                 }
2004                 list_push_back(&or->andor, &and->node);
2005             }
2006             expr_destroy(expr);
2007             return expr_normalize_or(or);
2008         }
2009     }
2010     return expr;
2011 }
2012
2013 static struct expr *
2014 expr_normalize_or(struct expr *expr)
2015 {
2016     struct expr *sub, *next;
2017
2018     LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
2019         if (sub->type == EXPR_T_AND) {
2020             list_remove(&sub->node);
2021
2022             struct expr *new = expr_normalize_and(sub);
2023             if (new->type == EXPR_T_BOOLEAN) {
2024                 if (new->boolean) {
2025                     expr_destroy(expr);
2026                     return new;
2027                 }
2028                 free(new);
2029             } else {
2030                 expr_insert_andor(expr, next, new);
2031             }
2032         } else {
2033             ovs_assert(sub->type == EXPR_T_CMP);
2034         }
2035     }
2036     if (list_is_empty(&expr->andor)) {
2037         free(expr);
2038         return expr_create_boolean(false);
2039     }
2040     if (list_is_short(&expr->andor)) {
2041         struct expr *sub = expr_from_node(list_pop_front(&expr->andor));
2042         free(expr);
2043         return sub;
2044     }
2045
2046     return expr;
2047 }
2048
2049 /* Takes ownership of 'expr', which is either a constant "true" or "false" or
2050  * an expression in terms of only relationals, AND, and OR.  Returns either a
2051  * constant "true" or "false" or 'expr' reduced to OR(AND(clause)) where a
2052  * clause is a cmp or a disjunction of cmps on a single field.  This form is
2053  * significant because it is a form that can be directly converted to OpenFlow
2054  * flows with the Open vSwitch "conjunctive match" extension.
2055  *
2056  * 'expr' must already have been simplified, with expr_simplify(). */
2057 struct expr *
2058 expr_normalize(struct expr *expr)
2059 {
2060     switch (expr->type) {
2061     case EXPR_T_CMP:
2062         return expr;
2063
2064     case EXPR_T_AND:
2065         return expr_normalize_and(expr);
2066
2067     case EXPR_T_OR:
2068         return expr_normalize_or(expr);
2069
2070     case EXPR_T_BOOLEAN:
2071         return expr;
2072     }
2073     OVS_NOT_REACHED();
2074 }
2075 \f
2076 /* Creates, initializes, and returns a new 'struct expr_match'.  If 'm' is
2077  * nonnull then it is copied into the new expr_match, otherwise the new
2078  * expr_match's 'match' member is initialized to a catch-all match for the
2079  * caller to refine in-place.
2080  *
2081  * If 'conj_id' is nonzero, adds one conjunction based on 'conj_id', 'clause',
2082  * and 'n_clauses' to the returned 'struct expr_match', otherwise the
2083  * expr_match will not have any conjunctions.
2084  *
2085  * The caller should use expr_match_add() to add the expr_match to a hash table
2086  * after it is finalized. */
2087 static struct expr_match *
2088 expr_match_new(const struct match *m, uint8_t clause, uint8_t n_clauses,
2089                uint32_t conj_id)
2090 {
2091     struct expr_match *match = xmalloc(sizeof *match);
2092     if (m) {
2093         match->match = *m;
2094     } else {
2095         match_init_catchall(&match->match);
2096     }
2097     if (conj_id) {
2098         match->conjunctions = xmalloc(sizeof *match->conjunctions);
2099         match->conjunctions[0].id = conj_id;
2100         match->conjunctions[0].clause = clause;
2101         match->conjunctions[0].n_clauses = n_clauses;
2102         match->n = 1;
2103         match->allocated = 1;
2104     } else {
2105         match->conjunctions = NULL;
2106         match->n = 0;
2107         match->allocated = 0;
2108     }
2109     return match;
2110 }
2111
2112 /* Adds 'match' to hash table 'matches', which becomes the new owner of
2113  * 'match'.
2114  *
2115  * This might actually destroy 'match' because it gets merged together with
2116  * some existing conjunction.*/
2117 static void
2118 expr_match_add(struct hmap *matches, struct expr_match *match)
2119 {
2120     uint32_t hash = match_hash(&match->match, 0);
2121     struct expr_match *m;
2122
2123     HMAP_FOR_EACH_WITH_HASH (m, hmap_node, hash, matches) {
2124         if (match_equal(&m->match, &match->match)) {
2125             if (!m->n || !match->n) {
2126                 free(m->conjunctions);
2127                 m->conjunctions = NULL;
2128                 m->n = 0;
2129                 m->allocated = 0;
2130             } else {
2131                 ovs_assert(match->n == 1);
2132                 if (m->n >= m->allocated) {
2133                     m->conjunctions = x2nrealloc(m->conjunctions,
2134                                                  &m->allocated,
2135                                                  sizeof *m->conjunctions);
2136                 }
2137                 m->conjunctions[m->n++] = match->conjunctions[0];
2138             }
2139             free(match->conjunctions);
2140             free(match);
2141             return;
2142         }
2143     }
2144
2145     hmap_insert(matches, &match->hmap_node, hash);
2146 }
2147
2148 static bool
2149 constrain_match(const struct expr *expr, const struct simap *ports,
2150                 struct match *m)
2151 {
2152     ovs_assert(expr->type == EXPR_T_CMP);
2153     if (expr->cmp.symbol->width) {
2154         mf_mask_subfield(expr->cmp.symbol->field, &expr->cmp.value,
2155                          &expr->cmp.mask, m);
2156     } else {
2157         const struct simap_node *node;
2158         node = ports ? simap_find(ports, expr->cmp.string) : NULL;
2159         if (!node) {
2160             return false;
2161         }
2162
2163         struct mf_subfield sf;
2164         sf.field = expr->cmp.symbol->field;
2165         sf.ofs = 0;
2166         sf.n_bits = expr->cmp.symbol->field->n_bits;
2167
2168         union mf_subvalue x;
2169         memset(&x, 0, sizeof x);
2170         x.integer = htonll(node->data);
2171
2172         mf_write_subfield(&sf, &x, m);
2173     }
2174     return true;
2175 }
2176
2177 static bool
2178 add_disjunction(const struct expr *or, const struct simap *ports,
2179                 struct match *m, uint8_t clause, uint8_t n_clauses,
2180                 uint32_t conj_id, struct hmap *matches)
2181 {
2182     struct expr *sub;
2183     int n = 0;
2184
2185     ovs_assert(or->type == EXPR_T_OR);
2186     LIST_FOR_EACH (sub, node, &or->andor) {
2187         struct expr_match *match = expr_match_new(m, clause, n_clauses,
2188                                                   conj_id);
2189         if (constrain_match(sub, ports, &match->match)) {
2190             expr_match_add(matches, match);
2191             n++;
2192         } else {
2193             free(match->conjunctions);
2194             free(match);
2195         }
2196     }
2197
2198     /* If n == 1, then this didn't really need to be a disjunction.  Oh well,
2199      * that shouldn't happen much. */
2200     return n > 0;
2201 }
2202
2203 static void
2204 add_conjunction(const struct expr *and, const struct simap *ports,
2205                 uint32_t *n_conjsp, struct hmap *matches)
2206 {
2207     struct match match;
2208     int n_clauses = 0;
2209     struct expr *sub;
2210
2211     match_init_catchall(&match);
2212
2213     ovs_assert(and->type == EXPR_T_AND);
2214     LIST_FOR_EACH (sub, node, &and->andor) {
2215         switch (sub->type) {
2216         case EXPR_T_CMP:
2217             if (!constrain_match(sub, ports, &match)) {
2218                 return;
2219             }
2220             break;
2221         case EXPR_T_OR:
2222             n_clauses++;
2223             break;
2224         case EXPR_T_AND:
2225         case EXPR_T_BOOLEAN:
2226             OVS_NOT_REACHED();
2227         }
2228     }
2229
2230     if (!n_clauses) {
2231         expr_match_add(matches, expr_match_new(&match, 0, 0, 0));
2232     } else if (n_clauses == 1) {
2233         LIST_FOR_EACH (sub, node, &and->andor) {
2234             if (sub->type == EXPR_T_OR) {
2235                 add_disjunction(sub, ports, &match, 0, 0, 0, matches);
2236             }
2237         }
2238     } else {
2239         int clause = 0;
2240         (*n_conjsp)++;
2241         LIST_FOR_EACH (sub, node, &and->andor) {
2242             if (sub->type == EXPR_T_OR) {
2243                 if (!add_disjunction(sub, ports, &match, clause++,
2244                                      n_clauses, *n_conjsp, matches)) {
2245                     /* This clause can't ever match, so we might as well skip
2246                      * adding the other clauses--the overall disjunctive flow
2247                      * can't ever match.  Ideally we would also back out all of
2248                      * the clauses we already added, but that seems like a lot
2249                      * of trouble for a case that might never occur in
2250                      * practice. */
2251                     return;
2252                 }
2253             }
2254         }
2255     }
2256 }
2257
2258 static void
2259 add_cmp_flow(const struct expr *cmp, const struct simap *ports,
2260              struct hmap *matches)
2261 {
2262     struct expr_match *m = expr_match_new(NULL, 0, 0, 0);
2263     if (constrain_match(cmp, ports, &m->match)) {
2264         expr_match_add(matches, m);
2265     } else {
2266         free(m);
2267     }
2268 }
2269
2270 /* Converts 'expr', which must be in the form returned by expr_normalize(), to
2271  * a collection of Open vSwitch flows in 'matches', which this function
2272  * initializes to an hmap of "struct expr_match" structures.  Returns the
2273  * number of conjunctive match IDs consumed by 'matches', which uses
2274  * conjunctive match IDs beginning with 0; the caller must offset or remap them
2275  * into the desired range as necessary.
2276  *
2277  * 'ports' must be a map from strings (presumably names of ports) to integers.
2278  * Any comparisons against string fields in 'expr' are translated into integers
2279  * through this map.  A comparison against a string that is not in 'ports' acts
2280  * like a Boolean "false"; that is, it will always fail to match.  For a simple
2281  * expression, this means that the overall expression always fails to match,
2282  * but an expression with a disjunction on the string field might still match
2283  * on other port names.
2284  *
2285  * (This treatment of string fields might be too simplistic in general, but it
2286  * seems reasonable for now when string fields are used only for ports.) */
2287 uint32_t
2288 expr_to_matches(const struct expr *expr, const struct simap *ports,
2289                 struct hmap *matches)
2290 {
2291     uint32_t n_conjs = 0;
2292
2293     hmap_init(matches);
2294     switch (expr->type) {
2295     case EXPR_T_CMP:
2296         add_cmp_flow(expr, ports, matches);
2297         break;
2298
2299     case EXPR_T_AND:
2300         add_conjunction(expr, ports, &n_conjs, matches);
2301         break;
2302
2303     case EXPR_T_OR:
2304         if (expr_is_cmp(expr)) {
2305             struct expr *sub;
2306
2307             LIST_FOR_EACH (sub, node, &expr->andor) {
2308                 add_cmp_flow(sub, ports, matches);
2309             }
2310         } else {
2311             struct expr *sub;
2312
2313             LIST_FOR_EACH (sub, node, &expr->andor) {
2314                 if (sub->type == EXPR_T_AND) {
2315                     add_conjunction(sub, ports, &n_conjs, matches);
2316                 } else {
2317                     add_cmp_flow(sub, ports, matches);
2318                 }
2319             }
2320         }
2321         break;
2322
2323     case EXPR_T_BOOLEAN:
2324         if (expr->boolean) {
2325             struct expr_match *m = expr_match_new(NULL, 0, 0, 0);
2326             expr_match_add(matches, m);
2327         } else {
2328             /* No match. */
2329         }
2330         break;
2331     }
2332     return n_conjs;
2333 }
2334
2335 /* Destroys all of the 'struct expr_match'es in 'matches', as well as the
2336  * 'matches' hmap itself. */
2337 void
2338 expr_matches_destroy(struct hmap *matches)
2339 {
2340     struct expr_match *m, *n;
2341
2342     HMAP_FOR_EACH_SAFE (m, n, hmap_node, matches) {
2343         hmap_remove(matches, &m->hmap_node);
2344         free(m->conjunctions);
2345         free(m);
2346     }
2347     hmap_destroy(matches);
2348 }
2349
2350 /* Prints a representation of the 'struct expr_match'es in 'matches' to
2351  * 'stream'. */
2352 void
2353 expr_matches_print(const struct hmap *matches, FILE *stream)
2354 {
2355     if (hmap_is_empty(matches)) {
2356         fputs("(no flows)\n", stream);
2357         return;
2358     }
2359
2360     const struct expr_match *m;
2361     HMAP_FOR_EACH (m, hmap_node, matches) {
2362         char *s = match_to_string(&m->match, OFP_DEFAULT_PRIORITY);
2363         fputs(s, stream);
2364         free(s);
2365
2366         if (m->n) {
2367             for (int i = 0; i < m->n; i++) {
2368                 const struct cls_conjunction *c = &m->conjunctions[i];
2369                 fprintf(stream, "%c conjunction(%"PRIu32", %d/%d)",
2370                         i == 0 ? ':' : ',', c->id, c->clause, c->n_clauses);
2371             }
2372         }
2373         putc('\n', stream);
2374     }
2375 }
2376 \f
2377 /* Returns true if 'expr' honors the invariants for expressions (see the large
2378  * comment above "struct expr" in expr.h), false otherwise. */
2379 bool
2380 expr_honors_invariants(const struct expr *expr)
2381 {
2382     const struct expr *sub;
2383
2384     switch (expr->type) {
2385     case EXPR_T_CMP:
2386         if (expr->cmp.symbol->width) {
2387             for (int i = 0; i < ARRAY_SIZE(expr->cmp.value.be64); i++) {
2388                 if (expr->cmp.value.be64[i] & ~expr->cmp.mask.be64[i]) {
2389                     return false;
2390                 }
2391             }
2392         }
2393         return true;
2394
2395     case EXPR_T_AND:
2396     case EXPR_T_OR:
2397         if (list_is_short(&expr->andor)) {
2398             return false;
2399         }
2400         LIST_FOR_EACH (sub, node, &expr->andor) {
2401             if (sub->type == expr->type || !expr_honors_invariants(sub)) {
2402                 return false;
2403             }
2404         }
2405         return true;
2406
2407     case EXPR_T_BOOLEAN:
2408         return true;
2409
2410     default:
2411         OVS_NOT_REACHED();
2412     }
2413 }
2414
2415 static bool
2416 expr_is_normalized_and(const struct expr *expr)
2417 {
2418     /* XXX should also check that no symbol is repeated. */
2419     const struct expr *sub;
2420
2421     LIST_FOR_EACH (sub, node, &expr->andor) {
2422         if (!expr_is_cmp(sub)) {
2423             return false;
2424         }
2425     }
2426     return true;
2427 }
2428
2429 /* Returns true if 'expr' is in the form returned by expr_normalize(), false
2430  * otherwise. */
2431 bool
2432 expr_is_normalized(const struct expr *expr)
2433 {
2434     switch (expr->type) {
2435     case EXPR_T_CMP:
2436         return true;
2437
2438     case EXPR_T_AND:
2439         return expr_is_normalized_and(expr);
2440
2441     case EXPR_T_OR:
2442         if (!expr_is_cmp(expr)) {
2443             const struct expr *sub;
2444
2445             LIST_FOR_EACH (sub, node, &expr->andor) {
2446                 if (!expr_is_cmp(sub) && !expr_is_normalized_and(sub)) {
2447                     return false;
2448                 }
2449             }
2450         }
2451         return true;
2452
2453     case EXPR_T_BOOLEAN:
2454         return true;
2455
2456     default:
2457         OVS_NOT_REACHED();
2458     }
2459 }