2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 #include "dynamic-string.h"
26 /* Returns a string that represents 'format'. */
28 lex_format_to_string(enum lex_format format)
33 case LEX_F_HEXADECIMAL:
46 /* Initializes 'token'. */
48 lex_token_init(struct lex_token *token)
50 token->type = LEX_T_END;
54 /* Frees memory owned by 'token'. */
56 lex_token_destroy(struct lex_token *token)
61 /* Exchanges 'a' and 'b'. */
63 lex_token_swap(struct lex_token *a, struct lex_token *b)
65 struct lex_token tmp = *a;
70 /* lex_token_format(). */
73 lex_token_n_zeros(enum lex_format format)
76 case LEX_F_DECIMAL: return offsetof(union mf_subvalue, integer);
77 case LEX_F_HEXADECIMAL: return 0;
78 case LEX_F_IPV4: return offsetof(union mf_subvalue, ipv4);
79 case LEX_F_IPV6: return offsetof(union mf_subvalue, ipv6);
80 case LEX_F_ETHERNET: return offsetof(union mf_subvalue, mac);
81 default: OVS_NOT_REACHED();
85 /* Returns the effective format for 'token', that is, the format in which it
86 * should actually be printed. This is ordinarily the same as 'token->format',
87 * but it's always possible that someone sets up a token with a format that
88 * won't work for a value, e.g. 'token->value' is wider than 32 bits but the
89 * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt
90 * to avoid confusion in the future.) */
91 static enum lex_format
92 lex_token_get_format(const struct lex_token *token)
94 size_t n_zeros = lex_token_n_zeros(token->format);
95 return (is_all_zeros(&token->value, n_zeros)
96 && (token->type != LEX_T_MASKED_INTEGER
97 || is_all_zeros(&token->mask, n_zeros))
103 lex_token_format_value(const union mf_subvalue *value,
104 enum lex_format format, struct ds *s)
108 ds_put_format(s, "%"PRIu64, ntohll(value->integer));
111 case LEX_F_HEXADECIMAL:
112 mf_format_subvalue(value, s);
116 ds_put_format(s, IP_FMT, IP_ARGS(value->ipv4));
120 print_ipv6_addr(s, &value->ipv6);
124 ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(value->mac));
134 lex_token_format_masked_integer(const struct lex_token *token, struct ds *s)
136 enum lex_format format = lex_token_get_format(token);
138 lex_token_format_value(&token->value, format, s);
141 const union mf_subvalue *mask = &token->mask;
142 if (format == LEX_F_IPV4 && ip_is_cidr(mask->ipv4)) {
143 ds_put_format(s, "%d", ip_count_cidr_bits(mask->ipv4));
144 } else if (token->format == LEX_F_IPV6 && ipv6_is_cidr(&mask->ipv6)) {
145 ds_put_format(s, "%d", ipv6_count_cidr_bits(&mask->ipv6));
147 lex_token_format_value(&token->mask, format, s);
151 /* Appends a string representation of 'token' to 's', in a format that can be
152 * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be
155 lex_token_format(const struct lex_token *token, struct ds *s)
157 switch (token->type) {
163 ds_put_cstr(s, token->s);
167 ds_put_cstr(s, "error(");
168 json_string_escape(token->s, s);
173 json_string_escape(token->s, s);
177 lex_token_format_value(&token->value, lex_token_get_format(token), s);
180 case LEX_T_MASKED_INTEGER:
181 lex_token_format_masked_integer(token, s);
203 ds_put_cstr(s, "==");
206 ds_put_cstr(s, "!=");
212 ds_put_cstr(s, "<=");
218 ds_put_cstr(s, ">=");
224 ds_put_cstr(s, "&&");
227 ds_put_cstr(s, "||");
230 ds_put_cstr(s, "..");
235 case LEX_T_SEMICOLON:
242 ds_put_cstr(s, "<->");
250 /* lex_token_parse(). */
252 static void OVS_PRINTF_FORMAT(2, 3)
253 lex_error(struct lex_token *token, const char *message, ...)
255 ovs_assert(!token->s);
256 token->type = LEX_T_ERROR;
259 va_start(args, message);
260 token->s = xvasprintf(message, args);
265 lex_parse_hex_integer(const char *start, size_t len, struct lex_token *token)
267 const char *in = start + (len - 1);
268 uint8_t *out = token->value.u8 + (sizeof token->value.u8 - 1);
270 for (int i = 0; i < len; i++) {
271 int hexit = hexit_value(in[-i]);
273 lex_error(token, "Invalid syntax in hexadecimal constant.");
276 if (hexit && i / 2 >= sizeof token->value.u8) {
277 lex_error(token, "Hexadecimal constant requires more than "
278 "%"PRIuSIZE" bits.", 8 * sizeof token->value.u8);
281 out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit;
283 token->format = LEX_F_HEXADECIMAL;
287 lex_parse_integer__(const char *p, struct lex_token *token)
289 lex_token_init(token);
290 token->type = LEX_T_INTEGER;
291 memset(&token->value, 0, sizeof token->value);
292 const char *start = p;
293 const char *end = start;
294 while (isalnum((unsigned char) *end) || *end == ':'
295 || (*end == '.' && end[1] != '.')) {
298 size_t len = end - start;
304 lex_error(token, "Integer constant expected.");
306 && ovs_scan(start, ETH_ADDR_SCAN_FMT"%n",
307 ETH_ADDR_SCAN_ARGS(mac), &n)
309 token->value.mac = mac;
310 token->format = LEX_F_ETHERNET;
311 } else if (start + strspn(start, "0123456789") == end) {
312 if (p[0] == '0' && len > 1) {
313 lex_error(token, "Decimal constants must not have leading zeros.");
315 unsigned long long int integer;
319 integer = strtoull(p, &tail, 10);
320 if (tail != end || errno == ERANGE) {
321 lex_error(token, "Decimal constants must be less than 2**64.");
323 token->value.integer = htonll(integer);
324 token->format = LEX_F_DECIMAL;
327 } else if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
329 lex_parse_hex_integer(start + 2, len - 2, token);
331 lex_error(token, "Hex digits expected following 0%c.", p[1]);
333 } else if (len < INET6_ADDRSTRLEN) {
334 char copy[INET6_ADDRSTRLEN];
335 memcpy(copy, p, len);
339 struct in6_addr ipv6;
340 if (inet_pton(AF_INET, copy, &ipv4) == 1) {
341 token->value.ipv4 = ipv4.s_addr;
342 token->format = LEX_F_IPV4;
343 } else if (inet_pton(AF_INET6, copy, &ipv6) == 1) {
344 token->value.ipv6 = ipv6;
345 token->format = LEX_F_IPV6;
347 lex_error(token, "Invalid numeric constant.");
350 lex_error(token, "Invalid numeric constant.");
353 ovs_assert(token->type == LEX_T_INTEGER || token->type == LEX_T_ERROR);
358 lex_parse_mask(const char *p, struct lex_token *token)
360 struct lex_token mask;
362 /* Parse just past the '/' as a second integer. Handle errors. */
363 p = lex_parse_integer__(p + 1, &mask);
364 if (mask.type == LEX_T_ERROR) {
365 lex_token_swap(&mask, token);
366 lex_token_destroy(&mask);
369 ovs_assert(mask.type == LEX_T_INTEGER);
371 /* Now convert the value and mask into a masked integer token.
372 * We have a few special cases. */
373 token->type = LEX_T_MASKED_INTEGER;
374 memset(&token->mask, 0, sizeof token->mask);
375 uint32_t prefix_bits = ntohll(mask.value.integer);
376 if (token->format == mask.format) {
377 /* Same format value and mask is always OK. */
378 token->mask = mask.value;
379 } else if (token->format == LEX_F_IPV4
380 && mask.format == LEX_F_DECIMAL
381 && prefix_bits <= 32) {
382 /* IPv4 address with decimal mask is a CIDR prefix. */
383 token->mask.integer = htonll(ntohl(be32_prefix_mask(prefix_bits)));
384 } else if (token->format == LEX_F_IPV6
385 && mask.format == LEX_F_DECIMAL
386 && prefix_bits <= 128) {
387 /* IPv6 address with decimal mask is a CIDR prefix. */
388 token->mask.ipv6 = ipv6_create_mask(prefix_bits);
389 } else if (token->format == LEX_F_DECIMAL
390 && mask.format == LEX_F_HEXADECIMAL
391 && token->value.integer == 0) {
392 /* Special case for e.g. 0/0x1234. */
393 token->format = LEX_F_HEXADECIMAL;
394 token->mask = mask.value;
396 lex_error(token, "Value and mask have incompatible formats.");
400 /* Check invariant that a 1-bit in the value corresponds to a 1-bit in the
402 for (int i = 0; i < ARRAY_SIZE(token->mask.be32); i++) {
403 ovs_be32 v = token->value.be32[i];
404 ovs_be32 m = token->mask.be32[i];
407 lex_error(token, "Value contains unmasked 1-bits.");
413 lex_token_destroy(&mask);
418 lex_parse_integer(const char *p, struct lex_token *token)
420 p = lex_parse_integer__(p, token);
421 if (token->type == LEX_T_INTEGER && *p == '/') {
422 p = lex_parse_mask(p, token);
428 lex_parse_string(const char *p, struct lex_token *token)
430 const char *start = ++p;
434 lex_error(token, "Input ends inside quoted string.");
438 token->type = (json_string_unescape(start, p - start, &token->s)
439 ? LEX_T_STRING : LEX_T_ERROR);
457 lex_is_id1(unsigned char c)
459 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
460 || c == '_' || c == '.');
464 lex_is_idn(unsigned char c)
466 return lex_is_id1(c) || (c >= '0' && c <= '9');
470 lex_parse_id(const char *p, struct lex_token *token)
472 const char *start = p;
476 } while (lex_is_idn(*p));
478 token->type = LEX_T_ID;
479 token->s = xmemdup0(start, p - start);
483 /* Initializes 'token' and parses the first token from the beginning of
484 * null-terminated string 'p' into 'token'. Stores a pointer to the start of
485 * the token (after skipping white space and comments, if any) into '*startp'.
486 * Returns the character position at which to begin parsing the next token. */
488 lex_token_parse(struct lex_token *token, const char *p, const char **startp)
490 lex_token_init(token);
496 token->type = LEX_T_END;
499 case ' ': case '\t': case '\n': case '\r':
508 } while (*p != '\0' && *p != '\n');
510 } else if (*p == '*') {
513 if (*p == '*' && p[1] == '/') {
516 } else if (*p == '\0' || *p == '\n') {
517 lex_error(token, "`/*' without matching `*/'.");
526 "`/' is only valid as part of `//' or `/*'.");
531 token->type = LEX_T_LPAREN;
536 token->type = LEX_T_RPAREN;
541 token->type = LEX_T_LCURLY;
546 token->type = LEX_T_RCURLY;
551 token->type = LEX_T_LSQUARE;
556 token->type = LEX_T_RSQUARE;
563 token->type = LEX_T_EQ;
566 token->type = LEX_T_EQUALS;
573 token->type = LEX_T_NE;
576 token->type = LEX_T_LOG_NOT;
583 token->type = LEX_T_LOG_AND;
586 lex_error(token, "`&' is only valid as part of `&&'.");
593 token->type = LEX_T_LOG_OR;
596 lex_error(token, "`|' is only valid as part of `||'.");
603 token->type = LEX_T_LE;
605 } else if (*p == '-' && p[1] == '>') {
606 token->type = LEX_T_EXCHANGE;
609 token->type = LEX_T_LT;
616 token->type = LEX_T_GE;
619 token->type = LEX_T_GT;
626 token->type = LEX_T_ELLIPSIS;
629 lex_error(token, "`.' is only valid as part of `..' or a number.");
635 token->type = LEX_T_COMMA;
640 token->type = LEX_T_SEMICOLON;
643 case '0': case '1': case '2': case '3': case '4':
644 case '5': case '6': case '7': case '8': case '9':
646 p = lex_parse_integer(p, token);
650 p = lex_parse_string(p, token);
653 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
654 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
655 /* We need to distinguish an Ethernet address or IPv6 address from an
656 * identifier. Fortunately, Ethernet addresses and IPv6 addresses that
657 * are ambiguous based on the first character, always start with hex
658 * digits followed by a colon, but identifiers never do. */
659 p = (p[strspn(p, "0123456789abcdefABCDEF")] == ':'
660 ? lex_parse_integer(p, token)
661 : lex_parse_id(p, token));
665 if (lex_is_id1(*p)) {
666 p = lex_parse_id(p, token);
668 if (isprint((unsigned char) *p)) {
669 lex_error(token, "Invalid character `%c' in input.", *p);
671 lex_error(token, "Invalid byte 0x%d in input.", *p);
681 /* Initializes 'lexer' for parsing 'input'.
683 * While the lexer is in use, 'input' must remain available, but the caller
684 * otherwise retains ownership of 'input'.
686 * The caller must call lexer_get() to obtain the first token. */
688 lexer_init(struct lexer *lexer, const char *input)
690 lexer->input = input;
692 lex_token_init(&lexer->token);
695 /* Frees storage associated with 'lexer'. */
697 lexer_destroy(struct lexer *lexer)
699 lex_token_destroy(&lexer->token);
702 /* Obtains the next token from 'lexer' into 'lexer->token', and returns the
703 * token's type. The caller may examine 'lexer->token' directly to obtain full
704 * information about the token. */
706 lexer_get(struct lexer *lexer)
708 lex_token_destroy(&lexer->token);
709 lexer->input = lex_token_parse(&lexer->token, lexer->input, &lexer->start);
710 return lexer->token.type;
713 /* Returns the type of the next token that will be fetched by lexer_get(),
714 * without advancing 'lexer->token' to that token. */
716 lexer_lookahead(const struct lexer *lexer)
718 struct lex_token next;
722 lex_token_parse(&next, lexer->input, &start);
724 lex_token_destroy(&next);
728 /* If 'lexer''s current token has the given 'type', advances 'lexer' to the
729 * next token and returns true. Otherwise returns false. */
731 lexer_match(struct lexer *lexer, enum lex_type type)
733 if (lexer->token.type == type) {
741 /* If 'lexer''s current token is the identifier given in 'id', advances 'lexer'
742 * to the next token and returns true. Otherwise returns false. */
744 lexer_match_id(struct lexer *lexer, const char *id)
746 if (lexer->token.type == LEX_T_ID && !strcmp(lexer->token.s, id)) {