ofp-actions: Assert variable actions have len>0.
[cascardo/ovs.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import sys
17
18 import six
19 from six.moves import range
20
21 __pychecker__ = 'no-stringiter'
22
23 escapes = {ord('"'): u"\\\"",
24            ord("\\"): u"\\\\",
25            ord("\b"): u"\\b",
26            ord("\f"): u"\\f",
27            ord("\n"): u"\\n",
28            ord("\r"): u"\\r",
29            ord("\t"): u"\\t"}
30 for esc in range(32):
31     if esc not in escapes:
32         escapes[esc] = u"\\u%04x" % esc
33
34 SPACES_PER_LEVEL = 2
35
36
37 class _Serializer(object):
38     def __init__(self, stream, pretty, sort_keys):
39         self.stream = stream
40         self.pretty = pretty
41         self.sort_keys = sort_keys
42         self.depth = 0
43
44     def __serialize_string(self, s):
45         self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
46
47     def __indent_line(self):
48         if self.pretty:
49             self.stream.write('\n')
50             self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
51
52     def serialize(self, obj):
53         if obj is None:
54             self.stream.write(u"null")
55         elif obj is False:
56             self.stream.write(u"false")
57         elif obj is True:
58             self.stream.write(u"true")
59         elif isinstance(obj, six.integer_types):
60             self.stream.write(u"%d" % obj)
61         elif isinstance(obj, float):
62             self.stream.write("%.15g" % obj)
63         elif isinstance(obj, six.text_type):
64             # unicode() on Python 2, or str() in Python 3 (always unicode)
65             self.__serialize_string(obj)
66         elif isinstance(obj, str):
67             # This is for Python 2, where this comes out to unicode(str()).
68             # For Python 3, it's str(str()), but it's harmless.
69             self.__serialize_string(six.text_type(obj))
70         elif isinstance(obj, dict):
71             self.stream.write(u"{")
72
73             self.depth += 1
74             self.__indent_line()
75
76             if self.sort_keys:
77                 items = sorted(obj.items())
78             else:
79                 items = six.iteritems(obj)
80             for i, (key, value) in enumerate(items):
81                 if i > 0:
82                     self.stream.write(u",")
83                     self.__indent_line()
84                 self.__serialize_string(six.text_type(key))
85                 self.stream.write(u":")
86                 if self.pretty:
87                     self.stream.write(u' ')
88                 self.serialize(value)
89
90             self.stream.write(u"}")
91             self.depth -= 1
92         elif isinstance(obj, (list, tuple)):
93             self.stream.write(u"[")
94             self.depth += 1
95
96             if obj:
97                 self.__indent_line()
98
99                 for i, value in enumerate(obj):
100                     if i > 0:
101                         self.stream.write(u",")
102                         self.__indent_line()
103                     self.serialize(value)
104
105             self.depth -= 1
106             self.stream.write(u"]")
107         else:
108             raise Exception("can't serialize %s as JSON" % obj)
109
110
111 def to_stream(obj, stream, pretty=False, sort_keys=True):
112     _Serializer(stream, pretty, sort_keys).serialize(obj)
113
114
115 def to_file(obj, name, pretty=False, sort_keys=True):
116     stream = open(name, "w")
117     try:
118         to_stream(obj, stream, pretty, sort_keys)
119     finally:
120         stream.close()
121
122
123 def to_string(obj, pretty=False, sort_keys=True):
124     output = six.StringIO()
125     to_stream(obj, output, pretty, sort_keys)
126     s = output.getvalue()
127     output.close()
128     return s
129
130
131 def from_stream(stream):
132     p = Parser(check_trailer=True)
133     while True:
134         buf = stream.read(4096)
135         if buf == "" or p.feed(buf) != len(buf):
136             break
137     return p.finish()
138
139
140 def from_file(name):
141     stream = open(name, "r")
142     try:
143         return from_stream(stream)
144     finally:
145         stream.close()
146
147
148 def from_string(s):
149     if not isinstance(s, six.text_type):
150         # We assume the input is a string.  We will only hit this case for a
151         # str in Python 2 which is not unicode, so we need to go ahead and
152         # decode it.
153         try:
154             s = six.text_type(s, 'utf-8')
155         except UnicodeDecodeError as e:
156             seq = ' '.join(["0x%2x" % ord(c)
157                            for c in e.object[e.start:e.end] if ord(c) >= 0x80])
158             return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
159     p = Parser(check_trailer=True)
160     p.feed(s)
161     return p.finish()
162
163
164 class Parser(object):
165     # Maximum height of parsing stack. #
166     MAX_HEIGHT = 1000
167
168     def __init__(self, check_trailer=False):
169         self.check_trailer = check_trailer
170
171         # Lexical analysis.
172         self.lex_state = Parser.__lex_start
173         self.buffer = ""
174         self.line_number = 0
175         self.column_number = 0
176         self.byte_number = 0
177
178         # Parsing.
179         self.parse_state = Parser.__parse_start
180         self.stack = []
181         self.member_name = None
182
183         # Parse status.
184         self.done = False
185         self.error = None
186
187     def __lex_start_space(self, c):
188         pass
189
190     def __lex_start_alpha(self, c):
191         self.buffer = c
192         self.lex_state = Parser.__lex_keyword
193
194     def __lex_start_token(self, c):
195         self.__parser_input(c)
196
197     def __lex_start_number(self, c):
198         self.buffer = c
199         self.lex_state = Parser.__lex_number
200
201     def __lex_start_string(self, _):
202         self.lex_state = Parser.__lex_string
203
204     def __lex_start_error(self, c):
205         if ord(c) >= 32 and ord(c) < 128:
206             self.__error("invalid character '%s'" % c)
207         else:
208             self.__error("invalid character U+%04x" % ord(c))
209
210     __lex_start_actions = {}
211     for c in " \t\n\r":
212         __lex_start_actions[c] = __lex_start_space
213     for c in "abcdefghijklmnopqrstuvwxyz":
214         __lex_start_actions[c] = __lex_start_alpha
215     for c in "[{]}:,":
216         __lex_start_actions[c] = __lex_start_token
217     for c in "-0123456789":
218         __lex_start_actions[c] = __lex_start_number
219     __lex_start_actions['"'] = __lex_start_string
220
221     def __lex_start(self, c):
222         Parser.__lex_start_actions.get(
223             c, Parser.__lex_start_error)(self, c)
224         return True
225
226     __lex_alpha = {}
227     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
228         __lex_alpha[c] = True
229
230     def __lex_finish_keyword(self):
231         if self.buffer == "false":
232             self.__parser_input(False)
233         elif self.buffer == "true":
234             self.__parser_input(True)
235         elif self.buffer == "null":
236             self.__parser_input(None)
237         else:
238             self.__error("invalid keyword '%s'" % self.buffer)
239
240     def __lex_keyword(self, c):
241         if c in Parser.__lex_alpha:
242             self.buffer += c
243             return True
244         else:
245             self.__lex_finish_keyword()
246             return False
247
248     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
249             "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
250
251     def __lex_finish_number(self):
252         s = self.buffer
253         m = Parser.__number_re.match(s)
254         if m:
255             sign, integer, fraction, exp = m.groups()
256             if (exp is not None and
257                 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
258                 self.__error("exponent outside valid range")
259                 return
260
261             if fraction is not None and len(fraction.lstrip('0')) == 0:
262                 fraction = None
263
264             sig_string = integer
265             if fraction is not None:
266                 sig_string += fraction
267             significand = int(sig_string)
268
269             pow10 = 0
270             if fraction is not None:
271                 pow10 -= len(fraction)
272             if exp is not None:
273                 pow10 += int(exp)
274
275             if significand == 0:
276                 self.__parser_input(0)
277                 return
278             elif significand <= 2 ** 63:
279                 while pow10 > 0 and significand <= 2 ** 63:
280                     significand *= 10
281                     pow10 -= 1
282                 while pow10 < 0 and significand % 10 == 0:
283                     significand /= 10
284                     pow10 += 1
285                 if (pow10 == 0 and
286                     ((not sign and significand < 2 ** 63) or
287                      (sign and significand <= 2 ** 63))):
288                     if sign:
289                         self.__parser_input(-significand)
290                     else:
291                         self.__parser_input(significand)
292                     return
293
294             value = float(s)
295             if value == float("inf") or value == float("-inf"):
296                 self.__error("number outside valid range")
297                 return
298             if value == 0:
299                 # Suppress negative zero.
300                 value = 0
301             self.__parser_input(value)
302         elif re.match("-?0[0-9]", s):
303             self.__error("leading zeros not allowed")
304         elif re.match("-([^0-9]|$)", s):
305             self.__error("'-' must be followed by digit")
306         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
307             self.__error("decimal point must be followed by digit")
308         elif re.search("e[-+]?([^0-9]|$)", s):
309             self.__error("exponent must contain at least one digit")
310         else:
311             self.__error("syntax error in number")
312
313     def __lex_number(self, c):
314         if c in ".0123456789eE-+":
315             self.buffer += c
316             return True
317         else:
318             self.__lex_finish_number()
319             return False
320
321     __4hex_re = re.compile("[0-9a-fA-F]{4}")
322
323     def __lex_4hex(self, s):
324         if len(s) < 4:
325             self.__error("quoted string ends within \\u escape")
326         elif not Parser.__4hex_re.match(s):
327             self.__error("malformed \\u escape")
328         elif s == "0000":
329             self.__error("null bytes not supported in quoted strings")
330         else:
331             return int(s, 16)
332
333     @staticmethod
334     def __is_leading_surrogate(c):
335         """Returns true if 'c' is a Unicode code point for a leading
336         surrogate."""
337         return c >= 0xd800 and c <= 0xdbff
338
339     @staticmethod
340     def __is_trailing_surrogate(c):
341         """Returns true if 'c' is a Unicode code point for a trailing
342         surrogate."""
343         return c >= 0xdc00 and c <= 0xdfff
344
345     @staticmethod
346     def __utf16_decode_surrogate_pair(leading, trailing):
347         """Returns the unicode code point corresponding to leading surrogate
348         'leading' and trailing surrogate 'trailing'.  The return value will not
349         make any sense if 'leading' or 'trailing' are not in the correct ranges
350         for leading or trailing surrogates."""
351         #  Leading surrogate:         110110wwwwxxxxxx
352         # Trailing surrogate:         110111xxxxxxxxxx
353         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
354         w = (leading >> 6) & 0xf
355         u = w + 1
356         x0 = leading & 0x3f
357         x1 = trailing & 0x3ff
358         return (u << 16) | (x0 << 10) | x1
359     __unescape = {'"': u'"',
360                   "\\": u"\\",
361                   "/": u"/",
362                   "b": u"\b",
363                   "f": u"\f",
364                   "n": u"\n",
365                   "r": u"\r",
366                   "t": u"\t"}
367
368     def __lex_finish_string(self):
369         inp = self.buffer
370         out = u""
371         while len(inp):
372             backslash = inp.find('\\')
373             if backslash == -1:
374                 out += inp
375                 break
376             out += inp[:backslash]
377             inp = inp[backslash + 1:]
378             if inp == "":
379                 self.__error("quoted string may not end with backslash")
380                 return
381
382             replacement = Parser.__unescape.get(inp[0])
383             if replacement is not None:
384                 out += replacement
385                 inp = inp[1:]
386                 continue
387             elif inp[0] != u'u':
388                 self.__error("bad escape \\%s" % inp[0])
389                 return
390
391             c0 = self.__lex_4hex(inp[1:5])
392             if c0 is None:
393                 return
394             inp = inp[5:]
395
396             if Parser.__is_leading_surrogate(c0):
397                 if inp[:2] != u'\\u':
398                     self.__error("malformed escaped surrogate pair")
399                     return
400                 c1 = self.__lex_4hex(inp[2:6])
401                 if c1 is None:
402                     return
403                 if not Parser.__is_trailing_surrogate(c1):
404                     self.__error("second half of escaped surrogate pair is "
405                                  "not trailing surrogate")
406                     return
407                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
408                 inp = inp[6:]
409             else:
410                 code_point = c0
411             out += six.unichr(code_point)
412         self.__parser_input('string', out)
413
414     def __lex_string_escape(self, c):
415         self.buffer += c
416         self.lex_state = Parser.__lex_string
417         return True
418
419     def __lex_string(self, c):
420         if c == '\\':
421             self.buffer += c
422             self.lex_state = Parser.__lex_string_escape
423         elif c == '"':
424             self.__lex_finish_string()
425         elif ord(c) >= 0x20:
426             self.buffer += c
427         else:
428             self.__error("U+%04X must be escaped in quoted string" % ord(c))
429         return True
430
431     def __lex_input(self, c):
432         eat = self.lex_state(self, c)
433         assert eat is True or eat is False
434         return eat
435
436     def __parse_start(self, token, unused_string):
437         if token == '{':
438             self.__push_object()
439         elif token == '[':
440             self.__push_array()
441         else:
442             self.__error("syntax error at beginning of input")
443
444     def __parse_end(self, unused_token, unused_string):
445         self.__error("trailing garbage at end of input")
446
447     def __parse_object_init(self, token, string):
448         if token == '}':
449             self.__parser_pop()
450         else:
451             self.__parse_object_name(token, string)
452
453     def __parse_object_name(self, token, string):
454         if token == 'string':
455             self.member_name = string
456             self.parse_state = Parser.__parse_object_colon
457         else:
458             self.__error("syntax error parsing object expecting string")
459
460     def __parse_object_colon(self, token, unused_string):
461         if token == ":":
462             self.parse_state = Parser.__parse_object_value
463         else:
464             self.__error("syntax error parsing object expecting ':'")
465
466     def __parse_object_value(self, token, string):
467         self.__parse_value(token, string, Parser.__parse_object_next)
468
469     def __parse_object_next(self, token, unused_string):
470         if token == ",":
471             self.parse_state = Parser.__parse_object_name
472         elif token == "}":
473             self.__parser_pop()
474         else:
475             self.__error("syntax error expecting '}' or ','")
476
477     def __parse_array_init(self, token, string):
478         if token == ']':
479             self.__parser_pop()
480         else:
481             self.__parse_array_value(token, string)
482
483     def __parse_array_value(self, token, string):
484         self.__parse_value(token, string, Parser.__parse_array_next)
485
486     def __parse_array_next(self, token, unused_string):
487         if token == ",":
488             self.parse_state = Parser.__parse_array_value
489         elif token == "]":
490             self.__parser_pop()
491         else:
492             self.__error("syntax error expecting ']' or ','")
493
494     def __parser_input(self, token, string=None):
495         self.lex_state = Parser.__lex_start
496         self.buffer = ""
497         self.parse_state(self, token, string)
498
499     def __put_value(self, value):
500         top = self.stack[-1]
501         if isinstance(top, dict):
502             top[self.member_name] = value
503         else:
504             top.append(value)
505
506     def __parser_push(self, new_json, next_state):
507         if len(self.stack) < Parser.MAX_HEIGHT:
508             if len(self.stack) > 0:
509                 self.__put_value(new_json)
510             self.stack.append(new_json)
511             self.parse_state = next_state
512         else:
513             self.__error("input exceeds maximum nesting depth %d" %
514                          Parser.MAX_HEIGHT)
515
516     def __push_object(self):
517         self.__parser_push({}, Parser.__parse_object_init)
518
519     def __push_array(self):
520         self.__parser_push([], Parser.__parse_array_init)
521
522     def __parser_pop(self):
523         if len(self.stack) == 1:
524             self.parse_state = Parser.__parse_end
525             if not self.check_trailer:
526                 self.done = True
527         else:
528             self.stack.pop()
529             top = self.stack[-1]
530             if isinstance(top, list):
531                 self.parse_state = Parser.__parse_array_next
532             else:
533                 self.parse_state = Parser.__parse_object_next
534
535     def __parse_value(self, token, string, next_state):
536         number_types = list(six.integer_types)
537         number_types.extend([float])
538         number_types = tuple(number_types)
539         if token in [False, None, True] or isinstance(token, number_types):
540             self.__put_value(token)
541         elif token == 'string':
542             self.__put_value(string)
543         else:
544             if token == '{':
545                 self.__push_object()
546             elif token == '[':
547                 self.__push_array()
548             else:
549                 self.__error("syntax error expecting value")
550             return
551         self.parse_state = next_state
552
553     def __error(self, message):
554         if self.error is None:
555             self.error = ("line %d, column %d, byte %d: %s"
556                           % (self.line_number, self.column_number,
557                              self.byte_number, message))
558             self.done = True
559
560     def feed(self, s):
561         i = 0
562         while True:
563             if self.done or i >= len(s):
564                 return i
565
566             c = s[i]
567             if self.__lex_input(c):
568                 self.byte_number += 1
569                 if c == '\n':
570                     self.column_number = 0
571                     self.line_number += 1
572                 else:
573                     self.column_number += 1
574
575                 i += 1
576
577     def is_done(self):
578         return self.done
579
580     def finish(self):
581         if self.lex_state == Parser.__lex_start:
582             pass
583         elif self.lex_state in (Parser.__lex_string,
584                                 Parser.__lex_string_escape):
585             self.__error("unexpected end of input in quoted string")
586         else:
587             self.__lex_input(" ")
588
589         if self.parse_state == Parser.__parse_start:
590             self.__error("empty input stream")
591         elif self.parse_state != Parser.__parse_end:
592             self.__error("unexpected end of input")
593
594         if self.error is None:
595             assert len(self.stack) == 1
596             return self.stack.pop()
597         else:
598             return self.error