ovn: Add colon token to lexer, to support parsing "1.2.3.4:5".

author Ben Pfaff <blp@ovn.org>

Sun, 3 Jul 2016 10:34:53 +0000 (03:34 -0700)

committer Gurucharan Shetty <guru@ovn.org>

Sun, 3 Jul 2016 22:52:50 +0000 (15:52 -0700)
author Ben Pfaff <blp@ovn.org>
Sun, 3 Jul 2016 10:34:53 +0000 (03:34 -0700)
committer Gurucharan Shetty <guru@ovn.org>
Sun, 3 Jul 2016 22:52:50 +0000 (15:52 -0700)
diff --git a/ovn/lib/lex.c b/ovn/lib/lex.c

index 1467720..52c0946 100644 (file)
--- a/ovn/lib/lex.c
+++ b/ovn/lib/lex.c
@@ -294,6 +294,9 @@ lex_token_format(const struct lex_token *token, struct ds *s)
      case LEX_T_DECREMENT:
          ds_put_cstr(s, "--");
          break;
+    case LEX_T_COLON:
+        ds_put_char(s, ':');
+        break;
      default:
          OVS_NOT_REACHED();
      }
@@ -342,10 +345,37 @@ lex_parse_integer__(const char *p, struct lex_token *token)
      lex_token_init(token);
      token->type = LEX_T_INTEGER;
      memset(&token->value, 0, sizeof token->value);
+
+    /* Find the extent of an "integer" token, which can be in decimal or
+     * hexadecimal, or an Ethernet address or IPv4 or IPv6 address, as 'start'
+     * through 'end'.
+     *
+     * Special cases we handle here are:
+     *
+     *     - The ellipsis token "..", used as e.g. 123..456.  A doubled dot
+     *       is never valid syntax as part of an "integer", so we stop if
+     *       we encounter two dots in a row.
+     *
+     *     - Syntax like 1.2.3.4:1234 to indicate an IPv4 address followed by a
+     *       port number should be considered three tokens: 1.2.3.4 : 1234.
+     *       The obvious approach is to allow just dots or just colons within a
+     *       given integer, but that would disallow IPv4-mapped IPv6 addresses,
+     *       e.g. ::ffff:192.0.2.128.  However, even in those addresses, a
+     *       colon never follows a dot, so we stop if we encounter a colon
+     *       after a dot.
+     *
+     *       (There is no corresponding way to parse an IPv6 address followed
+     *       by a port number: ::1:2:3:4:1234 is unavoidably ambiguous.)
+     */
      const char *start = p;
      const char *end = start;
-    while (isalnum((unsigned char) *end) || *end == ':'
+    bool saw_dot = false;
+    while (isalnum((unsigned char) *end)
+           || (*end == ':' && !saw_dot)
             || (*end == '.' && end[1] != '.')) {
+        if (*end == '.') {
+            saw_dot = true;
+        }
          end++;
      }
      size_t len = end - start;
@@ -717,9 +747,15 @@ next:
          p = lex_parse_macro(p, token);
          break;
  
+    case ':':
+        if (p[1] != ':') {
+            token->type = LEX_T_COLON;
+            p++;
+            break;
+        }
+        /* IPv6 address beginning with "::".  Fall through. */
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
-    case ':':
          p = lex_parse_integer(p, token);
          break;
  
diff --git a/ovn/lib/lex.h b/ovn/lib/lex.h

index ee71a8b..4de48c7 100644 (file)
--- a/ovn/lib/lex.h
+++ b/ovn/lib/lex.h
@@ -61,6 +61,7 @@ enum lex_type {
      LEX_T_EQUALS,               /* = */
      LEX_T_EXCHANGE,             /* <-> */
      LEX_T_DECREMENT,            /* -- */
+    LEX_T_COLON,                /* : */
  };
  
  /* Subtype for LEX_T_INTEGER and LEX_T_MASKED_INTEGER tokens.
diff --git a/tests/ovn.at b/tests/ovn.at

index 986dac9..a274dcd 100644 (file)
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -58,6 +58,7 @@ a/b => a error("`/' is only valid as part of `//' or `/*'.") b
  192.168.0.0/255.0.0.0 => error("Value contains unmasked 1-bits.")
  192.168.0.0/32
  192.168.0.0/255.255.255.255 => 192.168.0.0/32
+1.2.3.4:5 => 1.2.3.4 : 5
  
  ::
  ::1
@@ -85,7 +86,7 @@ fe:x => error("Invalid numeric constant.")
  00:01:02:03:04:x => error("Invalid numeric constant.")
  
  # Test that operators are tokenized as expected, even without white space.
-(){}[[]]==!=<<=>>=!&&||..,;=<->-- => ( ) { } [[ ]] == != < <= > >= ! && || .. , ; = <-> --
+(){}[[]]==!=<<=>>=!&&||..,;=<->--: => ( ) { } [[ ]] == != < <= > >= ! && || .. , ; = <-> -- :
  & => error("`&' is only valid as part of `&&'.")
  | => error("`|' is only valid as part of `||'.")
  - => error("`-' is only valid as part of `--'.")
author	Ben Pfaff <blp@ovn.org>
	Sun, 3 Jul 2016 10:34:53 +0000 (03:34 -0700)
committer	Gurucharan Shetty <guru@ovn.org>
	Sun, 3 Jul 2016 22:52:50 +0000 (15:52 -0700)
ovn/lib/lex.c		patch \| blob \| history
ovn/lib/lex.h		patch \| blob \| history
tests/ovn.at		patch \| blob \| history