python: Drop usage of long type.
[cascardo/ovs.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 import six
20 from six.moves import range
21
22 __pychecker__ = 'no-stringiter'
23
24 escapes = {ord('"'): u"\\\"",
25            ord("\\"): u"\\\\",
26            ord("\b"): u"\\b",
27            ord("\f"): u"\\f",
28            ord("\n"): u"\\n",
29            ord("\r"): u"\\r",
30            ord("\t"): u"\\t"}
31 for esc in range(32):
32     if esc not in escapes:
33         escapes[esc] = u"\\u%04x" % esc
34
35 SPACES_PER_LEVEL = 2
36
37
38 class _Serializer(object):
39     def __init__(self, stream, pretty, sort_keys):
40         self.stream = stream
41         self.pretty = pretty
42         self.sort_keys = sort_keys
43         self.depth = 0
44
45     def __serialize_string(self, s):
46         self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
47
48     def __indent_line(self):
49         if self.pretty:
50             self.stream.write('\n')
51             self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
52
53     def serialize(self, obj):
54         if obj is None:
55             self.stream.write(u"null")
56         elif obj is False:
57             self.stream.write(u"false")
58         elif obj is True:
59             self.stream.write(u"true")
60         elif isinstance(obj, six.integer_types):
61             self.stream.write(u"%d" % obj)
62         elif isinstance(obj, float):
63             self.stream.write("%.15g" % obj)
64         elif isinstance(obj, unicode):
65             self.__serialize_string(obj)
66         elif isinstance(obj, str):
67             self.__serialize_string(unicode(obj))
68         elif isinstance(obj, dict):
69             self.stream.write(u"{")
70
71             self.depth += 1
72             self.__indent_line()
73
74             if self.sort_keys:
75                 items = sorted(obj.items())
76             else:
77                 items = six.iteritems(obj)
78             for i, (key, value) in enumerate(items):
79                 if i > 0:
80                     self.stream.write(u",")
81                     self.__indent_line()
82                 self.__serialize_string(unicode(key))
83                 self.stream.write(u":")
84                 if self.pretty:
85                     self.stream.write(u' ')
86                 self.serialize(value)
87
88             self.stream.write(u"}")
89             self.depth -= 1
90         elif isinstance(obj, (list, tuple)):
91             self.stream.write(u"[")
92             self.depth += 1
93
94             if obj:
95                 self.__indent_line()
96
97                 for i, value in enumerate(obj):
98                     if i > 0:
99                         self.stream.write(u",")
100                         self.__indent_line()
101                     self.serialize(value)
102
103             self.depth -= 1
104             self.stream.write(u"]")
105         else:
106             raise Exception("can't serialize %s as JSON" % obj)
107
108
109 def to_stream(obj, stream, pretty=False, sort_keys=True):
110     _Serializer(stream, pretty, sort_keys).serialize(obj)
111
112
113 def to_file(obj, name, pretty=False, sort_keys=True):
114     stream = open(name, "w")
115     try:
116         to_stream(obj, stream, pretty, sort_keys)
117     finally:
118         stream.close()
119
120
121 def to_string(obj, pretty=False, sort_keys=True):
122     output = StringIO.StringIO()
123     to_stream(obj, output, pretty, sort_keys)
124     s = output.getvalue()
125     output.close()
126     return s
127
128
129 def from_stream(stream):
130     p = Parser(check_trailer=True)
131     while True:
132         buf = stream.read(4096)
133         if buf == "" or p.feed(buf) != len(buf):
134             break
135     return p.finish()
136
137
138 def from_file(name):
139     stream = open(name, "r")
140     try:
141         return from_stream(stream)
142     finally:
143         stream.close()
144
145
146 def from_string(s):
147     try:
148         s = unicode(s, 'utf-8')
149     except UnicodeDecodeError as e:
150         seq = ' '.join(["0x%2x" % ord(c)
151                         for c in e.object[e.start:e.end] if ord(c) >= 0x80])
152         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
153     p = Parser(check_trailer=True)
154     p.feed(s)
155     return p.finish()
156
157
158 class Parser(object):
159     # Maximum height of parsing stack. #
160     MAX_HEIGHT = 1000
161
162     def __init__(self, check_trailer=False):
163         self.check_trailer = check_trailer
164
165         # Lexical analysis.
166         self.lex_state = Parser.__lex_start
167         self.buffer = ""
168         self.line_number = 0
169         self.column_number = 0
170         self.byte_number = 0
171
172         # Parsing.
173         self.parse_state = Parser.__parse_start
174         self.stack = []
175         self.member_name = None
176
177         # Parse status.
178         self.done = False
179         self.error = None
180
181     def __lex_start_space(self, c):
182         pass
183
184     def __lex_start_alpha(self, c):
185         self.buffer = c
186         self.lex_state = Parser.__lex_keyword
187
188     def __lex_start_token(self, c):
189         self.__parser_input(c)
190
191     def __lex_start_number(self, c):
192         self.buffer = c
193         self.lex_state = Parser.__lex_number
194
195     def __lex_start_string(self, _):
196         self.lex_state = Parser.__lex_string
197
198     def __lex_start_error(self, c):
199         if ord(c) >= 32 and ord(c) < 128:
200             self.__error("invalid character '%s'" % c)
201         else:
202             self.__error("invalid character U+%04x" % ord(c))
203
204     __lex_start_actions = {}
205     for c in " \t\n\r":
206         __lex_start_actions[c] = __lex_start_space
207     for c in "abcdefghijklmnopqrstuvwxyz":
208         __lex_start_actions[c] = __lex_start_alpha
209     for c in "[{]}:,":
210         __lex_start_actions[c] = __lex_start_token
211     for c in "-0123456789":
212         __lex_start_actions[c] = __lex_start_number
213     __lex_start_actions['"'] = __lex_start_string
214
215     def __lex_start(self, c):
216         Parser.__lex_start_actions.get(
217             c, Parser.__lex_start_error)(self, c)
218         return True
219
220     __lex_alpha = {}
221     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
222         __lex_alpha[c] = True
223
224     def __lex_finish_keyword(self):
225         if self.buffer == "false":
226             self.__parser_input(False)
227         elif self.buffer == "true":
228             self.__parser_input(True)
229         elif self.buffer == "null":
230             self.__parser_input(None)
231         else:
232             self.__error("invalid keyword '%s'" % self.buffer)
233
234     def __lex_keyword(self, c):
235         if c in Parser.__lex_alpha:
236             self.buffer += c
237             return True
238         else:
239             self.__lex_finish_keyword()
240             return False
241
242     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
243             "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
244
245     def __lex_finish_number(self):
246         s = self.buffer
247         m = Parser.__number_re.match(s)
248         if m:
249             sign, integer, fraction, exp = m.groups()
250             if (exp is not None and
251                 (int(exp) > sys.maxint or int(exp) < -sys.maxint - 1)):
252                 self.__error("exponent outside valid range")
253                 return
254
255             if fraction is not None and len(fraction.lstrip('0')) == 0:
256                 fraction = None
257
258             sig_string = integer
259             if fraction is not None:
260                 sig_string += fraction
261             significand = int(sig_string)
262
263             pow10 = 0
264             if fraction is not None:
265                 pow10 -= len(fraction)
266             if exp is not None:
267                 pow10 += int(exp)
268
269             if significand == 0:
270                 self.__parser_input(0)
271                 return
272             elif significand <= 2 ** 63:
273                 while pow10 > 0 and significand <= 2 ** 63:
274                     significand *= 10
275                     pow10 -= 1
276                 while pow10 < 0 and significand % 10 == 0:
277                     significand /= 10
278                     pow10 += 1
279                 if (pow10 == 0 and
280                     ((not sign and significand < 2 ** 63) or
281                      (sign and significand <= 2 ** 63))):
282                     if sign:
283                         self.__parser_input(-significand)
284                     else:
285                         self.__parser_input(significand)
286                     return
287
288             value = float(s)
289             if value == float("inf") or value == float("-inf"):
290                 self.__error("number outside valid range")
291                 return
292             if value == 0:
293                 # Suppress negative zero.
294                 value = 0
295             self.__parser_input(value)
296         elif re.match("-?0[0-9]", s):
297             self.__error("leading zeros not allowed")
298         elif re.match("-([^0-9]|$)", s):
299             self.__error("'-' must be followed by digit")
300         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
301             self.__error("decimal point must be followed by digit")
302         elif re.search("e[-+]?([^0-9]|$)", s):
303             self.__error("exponent must contain at least one digit")
304         else:
305             self.__error("syntax error in number")
306
307     def __lex_number(self, c):
308         if c in ".0123456789eE-+":
309             self.buffer += c
310             return True
311         else:
312             self.__lex_finish_number()
313             return False
314
315     __4hex_re = re.compile("[0-9a-fA-F]{4}")
316
317     def __lex_4hex(self, s):
318         if len(s) < 4:
319             self.__error("quoted string ends within \\u escape")
320         elif not Parser.__4hex_re.match(s):
321             self.__error("malformed \\u escape")
322         elif s == "0000":
323             self.__error("null bytes not supported in quoted strings")
324         else:
325             return int(s, 16)
326
327     @staticmethod
328     def __is_leading_surrogate(c):
329         """Returns true if 'c' is a Unicode code point for a leading
330         surrogate."""
331         return c >= 0xd800 and c <= 0xdbff
332
333     @staticmethod
334     def __is_trailing_surrogate(c):
335         """Returns true if 'c' is a Unicode code point for a trailing
336         surrogate."""
337         return c >= 0xdc00 and c <= 0xdfff
338
339     @staticmethod
340     def __utf16_decode_surrogate_pair(leading, trailing):
341         """Returns the unicode code point corresponding to leading surrogate
342         'leading' and trailing surrogate 'trailing'.  The return value will not
343         make any sense if 'leading' or 'trailing' are not in the correct ranges
344         for leading or trailing surrogates."""
345         #  Leading surrogate:         110110wwwwxxxxxx
346         # Trailing surrogate:         110111xxxxxxxxxx
347         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
348         w = (leading >> 6) & 0xf
349         u = w + 1
350         x0 = leading & 0x3f
351         x1 = trailing & 0x3ff
352         return (u << 16) | (x0 << 10) | x1
353     __unescape = {'"': u'"',
354                   "\\": u"\\",
355                   "/": u"/",
356                   "b": u"\b",
357                   "f": u"\f",
358                   "n": u"\n",
359                   "r": u"\r",
360                   "t": u"\t"}
361
362     def __lex_finish_string(self):
363         inp = self.buffer
364         out = u""
365         while len(inp):
366             backslash = inp.find('\\')
367             if backslash == -1:
368                 out += inp
369                 break
370             out += inp[:backslash]
371             inp = inp[backslash + 1:]
372             if inp == "":
373                 self.__error("quoted string may not end with backslash")
374                 return
375
376             replacement = Parser.__unescape.get(inp[0])
377             if replacement is not None:
378                 out += replacement
379                 inp = inp[1:]
380                 continue
381             elif inp[0] != u'u':
382                 self.__error("bad escape \\%s" % inp[0])
383                 return
384
385             c0 = self.__lex_4hex(inp[1:5])
386             if c0 is None:
387                 return
388             inp = inp[5:]
389
390             if Parser.__is_leading_surrogate(c0):
391                 if inp[:2] != u'\\u':
392                     self.__error("malformed escaped surrogate pair")
393                     return
394                 c1 = self.__lex_4hex(inp[2:6])
395                 if c1 is None:
396                     return
397                 if not Parser.__is_trailing_surrogate(c1):
398                     self.__error("second half of escaped surrogate pair is "
399                                  "not trailing surrogate")
400                     return
401                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
402                 inp = inp[6:]
403             else:
404                 code_point = c0
405             out += unichr(code_point)
406         self.__parser_input('string', out)
407
408     def __lex_string_escape(self, c):
409         self.buffer += c
410         self.lex_state = Parser.__lex_string
411         return True
412
413     def __lex_string(self, c):
414         if c == '\\':
415             self.buffer += c
416             self.lex_state = Parser.__lex_string_escape
417         elif c == '"':
418             self.__lex_finish_string()
419         elif ord(c) >= 0x20:
420             self.buffer += c
421         else:
422             self.__error("U+%04X must be escaped in quoted string" % ord(c))
423         return True
424
425     def __lex_input(self, c):
426         eat = self.lex_state(self, c)
427         assert eat is True or eat is False
428         return eat
429
430     def __parse_start(self, token, unused_string):
431         if token == '{':
432             self.__push_object()
433         elif token == '[':
434             self.__push_array()
435         else:
436             self.__error("syntax error at beginning of input")
437
438     def __parse_end(self, unused_token, unused_string):
439         self.__error("trailing garbage at end of input")
440
441     def __parse_object_init(self, token, string):
442         if token == '}':
443             self.__parser_pop()
444         else:
445             self.__parse_object_name(token, string)
446
447     def __parse_object_name(self, token, string):
448         if token == 'string':
449             self.member_name = string
450             self.parse_state = Parser.__parse_object_colon
451         else:
452             self.__error("syntax error parsing object expecting string")
453
454     def __parse_object_colon(self, token, unused_string):
455         if token == ":":
456             self.parse_state = Parser.__parse_object_value
457         else:
458             self.__error("syntax error parsing object expecting ':'")
459
460     def __parse_object_value(self, token, string):
461         self.__parse_value(token, string, Parser.__parse_object_next)
462
463     def __parse_object_next(self, token, unused_string):
464         if token == ",":
465             self.parse_state = Parser.__parse_object_name
466         elif token == "}":
467             self.__parser_pop()
468         else:
469             self.__error("syntax error expecting '}' or ','")
470
471     def __parse_array_init(self, token, string):
472         if token == ']':
473             self.__parser_pop()
474         else:
475             self.__parse_array_value(token, string)
476
477     def __parse_array_value(self, token, string):
478         self.__parse_value(token, string, Parser.__parse_array_next)
479
480     def __parse_array_next(self, token, unused_string):
481         if token == ",":
482             self.parse_state = Parser.__parse_array_value
483         elif token == "]":
484             self.__parser_pop()
485         else:
486             self.__error("syntax error expecting ']' or ','")
487
488     def __parser_input(self, token, string=None):
489         self.lex_state = Parser.__lex_start
490         self.buffer = ""
491         self.parse_state(self, token, string)
492
493     def __put_value(self, value):
494         top = self.stack[-1]
495         if type(top) == dict:
496             top[self.member_name] = value
497         else:
498             top.append(value)
499
500     def __parser_push(self, new_json, next_state):
501         if len(self.stack) < Parser.MAX_HEIGHT:
502             if len(self.stack) > 0:
503                 self.__put_value(new_json)
504             self.stack.append(new_json)
505             self.parse_state = next_state
506         else:
507             self.__error("input exceeds maximum nesting depth %d" %
508                          Parser.MAX_HEIGHT)
509
510     def __push_object(self):
511         self.__parser_push({}, Parser.__parse_object_init)
512
513     def __push_array(self):
514         self.__parser_push([], Parser.__parse_array_init)
515
516     def __parser_pop(self):
517         if len(self.stack) == 1:
518             self.parse_state = Parser.__parse_end
519             if not self.check_trailer:
520                 self.done = True
521         else:
522             self.stack.pop()
523             top = self.stack[-1]
524             if type(top) == list:
525                 self.parse_state = Parser.__parse_array_next
526             else:
527                 self.parse_state = Parser.__parse_object_next
528
529     def __parse_value(self, token, string, next_state):
530         number_types = list(six.integer_types)
531         number_types.extend([float])
532         number_types = tuple(number_types)
533         if token in [False, None, True] or isinstance(token, number_types):
534             self.__put_value(token)
535         elif token == 'string':
536             self.__put_value(string)
537         else:
538             if token == '{':
539                 self.__push_object()
540             elif token == '[':
541                 self.__push_array()
542             else:
543                 self.__error("syntax error expecting value")
544             return
545         self.parse_state = next_state
546
547     def __error(self, message):
548         if self.error is None:
549             self.error = ("line %d, column %d, byte %d: %s"
550                           % (self.line_number, self.column_number,
551                              self.byte_number, message))
552             self.done = True
553
554     def feed(self, s):
555         i = 0
556         while True:
557             if self.done or i >= len(s):
558                 return i
559
560             c = s[i]
561             if self.__lex_input(c):
562                 self.byte_number += 1
563                 if c == '\n':
564                     self.column_number = 0
565                     self.line_number += 1
566                 else:
567                     self.column_number += 1
568
569                 i += 1
570
571     def is_done(self):
572         return self.done
573
574     def finish(self):
575         if self.lex_state == Parser.__lex_start:
576             pass
577         elif self.lex_state in (Parser.__lex_string,
578                                 Parser.__lex_string_escape):
579             self.__error("unexpected end of input in quoted string")
580         else:
581             self.__lex_input(" ")
582
583         if self.parse_state == Parser.__parse_start:
584             self.__error("empty input stream")
585         elif self.parse_state != Parser.__parse_end:
586             self.__error("unexpected end of input")
587
588         if self.error is None:
589             assert len(self.stack) == 1
590             return self.stack.pop()
591         else:
592             return self.error