python: Stop using xrange().
[cascardo/ovs.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 from six.moves import range
20
21 __pychecker__ = 'no-stringiter'
22
23 escapes = {ord('"'): u"\\\"",
24            ord("\\"): u"\\\\",
25            ord("\b"): u"\\b",
26            ord("\f"): u"\\f",
27            ord("\n"): u"\\n",
28            ord("\r"): u"\\r",
29            ord("\t"): u"\\t"}
30 for esc in range(32):
31     if esc not in escapes:
32         escapes[esc] = u"\\u%04x" % esc
33
34 SPACES_PER_LEVEL = 2
35
36
37 class _Serializer(object):
38     def __init__(self, stream, pretty, sort_keys):
39         self.stream = stream
40         self.pretty = pretty
41         self.sort_keys = sort_keys
42         self.depth = 0
43
44     def __serialize_string(self, s):
45         self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
46
47     def __indent_line(self):
48         if self.pretty:
49             self.stream.write('\n')
50             self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
51
52     def serialize(self, obj):
53         if obj is None:
54             self.stream.write(u"null")
55         elif obj is False:
56             self.stream.write(u"false")
57         elif obj is True:
58             self.stream.write(u"true")
59         elif type(obj) in (int, long):
60             self.stream.write(u"%d" % obj)
61         elif type(obj) == float:
62             self.stream.write("%.15g" % obj)
63         elif type(obj) == unicode:
64             self.__serialize_string(obj)
65         elif type(obj) == str:
66             self.__serialize_string(unicode(obj))
67         elif type(obj) == dict:
68             self.stream.write(u"{")
69
70             self.depth += 1
71             self.__indent_line()
72
73             if self.sort_keys:
74                 items = sorted(obj.items())
75             else:
76                 items = obj.iteritems()
77             for i, (key, value) in enumerate(items):
78                 if i > 0:
79                     self.stream.write(u",")
80                     self.__indent_line()
81                 self.__serialize_string(unicode(key))
82                 self.stream.write(u":")
83                 if self.pretty:
84                     self.stream.write(u' ')
85                 self.serialize(value)
86
87             self.stream.write(u"}")
88             self.depth -= 1
89         elif type(obj) in (list, tuple):
90             self.stream.write(u"[")
91             self.depth += 1
92
93             if obj:
94                 self.__indent_line()
95
96                 for i, value in enumerate(obj):
97                     if i > 0:
98                         self.stream.write(u",")
99                         self.__indent_line()
100                     self.serialize(value)
101
102             self.depth -= 1
103             self.stream.write(u"]")
104         else:
105             raise Exception("can't serialize %s as JSON" % obj)
106
107
108 def to_stream(obj, stream, pretty=False, sort_keys=True):
109     _Serializer(stream, pretty, sort_keys).serialize(obj)
110
111
112 def to_file(obj, name, pretty=False, sort_keys=True):
113     stream = open(name, "w")
114     try:
115         to_stream(obj, stream, pretty, sort_keys)
116     finally:
117         stream.close()
118
119
120 def to_string(obj, pretty=False, sort_keys=True):
121     output = StringIO.StringIO()
122     to_stream(obj, output, pretty, sort_keys)
123     s = output.getvalue()
124     output.close()
125     return s
126
127
128 def from_stream(stream):
129     p = Parser(check_trailer=True)
130     while True:
131         buf = stream.read(4096)
132         if buf == "" or p.feed(buf) != len(buf):
133             break
134     return p.finish()
135
136
137 def from_file(name):
138     stream = open(name, "r")
139     try:
140         return from_stream(stream)
141     finally:
142         stream.close()
143
144
145 def from_string(s):
146     try:
147         s = unicode(s, 'utf-8')
148     except UnicodeDecodeError as e:
149         seq = ' '.join(["0x%2x" % ord(c)
150                         for c in e.object[e.start:e.end] if ord(c) >= 0x80])
151         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
152     p = Parser(check_trailer=True)
153     p.feed(s)
154     return p.finish()
155
156
157 class Parser(object):
158     # Maximum height of parsing stack. #
159     MAX_HEIGHT = 1000
160
161     def __init__(self, check_trailer=False):
162         self.check_trailer = check_trailer
163
164         # Lexical analysis.
165         self.lex_state = Parser.__lex_start
166         self.buffer = ""
167         self.line_number = 0
168         self.column_number = 0
169         self.byte_number = 0
170
171         # Parsing.
172         self.parse_state = Parser.__parse_start
173         self.stack = []
174         self.member_name = None
175
176         # Parse status.
177         self.done = False
178         self.error = None
179
180     def __lex_start_space(self, c):
181         pass
182
183     def __lex_start_alpha(self, c):
184         self.buffer = c
185         self.lex_state = Parser.__lex_keyword
186
187     def __lex_start_token(self, c):
188         self.__parser_input(c)
189
190     def __lex_start_number(self, c):
191         self.buffer = c
192         self.lex_state = Parser.__lex_number
193
194     def __lex_start_string(self, _):
195         self.lex_state = Parser.__lex_string
196
197     def __lex_start_error(self, c):
198         if ord(c) >= 32 and ord(c) < 128:
199             self.__error("invalid character '%s'" % c)
200         else:
201             self.__error("invalid character U+%04x" % ord(c))
202
203     __lex_start_actions = {}
204     for c in " \t\n\r":
205         __lex_start_actions[c] = __lex_start_space
206     for c in "abcdefghijklmnopqrstuvwxyz":
207         __lex_start_actions[c] = __lex_start_alpha
208     for c in "[{]}:,":
209         __lex_start_actions[c] = __lex_start_token
210     for c in "-0123456789":
211         __lex_start_actions[c] = __lex_start_number
212     __lex_start_actions['"'] = __lex_start_string
213
214     def __lex_start(self, c):
215         Parser.__lex_start_actions.get(
216             c, Parser.__lex_start_error)(self, c)
217         return True
218
219     __lex_alpha = {}
220     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
221         __lex_alpha[c] = True
222
223     def __lex_finish_keyword(self):
224         if self.buffer == "false":
225             self.__parser_input(False)
226         elif self.buffer == "true":
227             self.__parser_input(True)
228         elif self.buffer == "null":
229             self.__parser_input(None)
230         else:
231             self.__error("invalid keyword '%s'" % self.buffer)
232
233     def __lex_keyword(self, c):
234         if c in Parser.__lex_alpha:
235             self.buffer += c
236             return True
237         else:
238             self.__lex_finish_keyword()
239             return False
240
241     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
242             "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
243
244     def __lex_finish_number(self):
245         s = self.buffer
246         m = Parser.__number_re.match(s)
247         if m:
248             sign, integer, fraction, exp = m.groups()
249             if (exp is not None and
250                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
251                 self.__error("exponent outside valid range")
252                 return
253
254             if fraction is not None and len(fraction.lstrip('0')) == 0:
255                 fraction = None
256
257             sig_string = integer
258             if fraction is not None:
259                 sig_string += fraction
260             significand = int(sig_string)
261
262             pow10 = 0
263             if fraction is not None:
264                 pow10 -= len(fraction)
265             if exp is not None:
266                 pow10 += long(exp)
267
268             if significand == 0:
269                 self.__parser_input(0)
270                 return
271             elif significand <= 2 ** 63:
272                 while pow10 > 0 and significand <= 2 ** 63:
273                     significand *= 10
274                     pow10 -= 1
275                 while pow10 < 0 and significand % 10 == 0:
276                     significand /= 10
277                     pow10 += 1
278                 if (pow10 == 0 and
279                     ((not sign and significand < 2 ** 63) or
280                      (sign and significand <= 2 ** 63))):
281                     if sign:
282                         self.__parser_input(-significand)
283                     else:
284                         self.__parser_input(significand)
285                     return
286
287             value = float(s)
288             if value == float("inf") or value == float("-inf"):
289                 self.__error("number outside valid range")
290                 return
291             if value == 0:
292                 # Suppress negative zero.
293                 value = 0
294             self.__parser_input(value)
295         elif re.match("-?0[0-9]", s):
296             self.__error("leading zeros not allowed")
297         elif re.match("-([^0-9]|$)", s):
298             self.__error("'-' must be followed by digit")
299         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
300             self.__error("decimal point must be followed by digit")
301         elif re.search("e[-+]?([^0-9]|$)", s):
302             self.__error("exponent must contain at least one digit")
303         else:
304             self.__error("syntax error in number")
305
306     def __lex_number(self, c):
307         if c in ".0123456789eE-+":
308             self.buffer += c
309             return True
310         else:
311             self.__lex_finish_number()
312             return False
313
314     __4hex_re = re.compile("[0-9a-fA-F]{4}")
315
316     def __lex_4hex(self, s):
317         if len(s) < 4:
318             self.__error("quoted string ends within \\u escape")
319         elif not Parser.__4hex_re.match(s):
320             self.__error("malformed \\u escape")
321         elif s == "0000":
322             self.__error("null bytes not supported in quoted strings")
323         else:
324             return int(s, 16)
325
326     @staticmethod
327     def __is_leading_surrogate(c):
328         """Returns true if 'c' is a Unicode code point for a leading
329         surrogate."""
330         return c >= 0xd800 and c <= 0xdbff
331
332     @staticmethod
333     def __is_trailing_surrogate(c):
334         """Returns true if 'c' is a Unicode code point for a trailing
335         surrogate."""
336         return c >= 0xdc00 and c <= 0xdfff
337
338     @staticmethod
339     def __utf16_decode_surrogate_pair(leading, trailing):
340         """Returns the unicode code point corresponding to leading surrogate
341         'leading' and trailing surrogate 'trailing'.  The return value will not
342         make any sense if 'leading' or 'trailing' are not in the correct ranges
343         for leading or trailing surrogates."""
344         #  Leading surrogate:         110110wwwwxxxxxx
345         # Trailing surrogate:         110111xxxxxxxxxx
346         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
347         w = (leading >> 6) & 0xf
348         u = w + 1
349         x0 = leading & 0x3f
350         x1 = trailing & 0x3ff
351         return (u << 16) | (x0 << 10) | x1
352     __unescape = {'"': u'"',
353                   "\\": u"\\",
354                   "/": u"/",
355                   "b": u"\b",
356                   "f": u"\f",
357                   "n": u"\n",
358                   "r": u"\r",
359                   "t": u"\t"}
360
361     def __lex_finish_string(self):
362         inp = self.buffer
363         out = u""
364         while len(inp):
365             backslash = inp.find('\\')
366             if backslash == -1:
367                 out += inp
368                 break
369             out += inp[:backslash]
370             inp = inp[backslash + 1:]
371             if inp == "":
372                 self.__error("quoted string may not end with backslash")
373                 return
374
375             replacement = Parser.__unescape.get(inp[0])
376             if replacement is not None:
377                 out += replacement
378                 inp = inp[1:]
379                 continue
380             elif inp[0] != u'u':
381                 self.__error("bad escape \\%s" % inp[0])
382                 return
383
384             c0 = self.__lex_4hex(inp[1:5])
385             if c0 is None:
386                 return
387             inp = inp[5:]
388
389             if Parser.__is_leading_surrogate(c0):
390                 if inp[:2] != u'\\u':
391                     self.__error("malformed escaped surrogate pair")
392                     return
393                 c1 = self.__lex_4hex(inp[2:6])
394                 if c1 is None:
395                     return
396                 if not Parser.__is_trailing_surrogate(c1):
397                     self.__error("second half of escaped surrogate pair is "
398                                  "not trailing surrogate")
399                     return
400                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
401                 inp = inp[6:]
402             else:
403                 code_point = c0
404             out += unichr(code_point)
405         self.__parser_input('string', out)
406
407     def __lex_string_escape(self, c):
408         self.buffer += c
409         self.lex_state = Parser.__lex_string
410         return True
411
412     def __lex_string(self, c):
413         if c == '\\':
414             self.buffer += c
415             self.lex_state = Parser.__lex_string_escape
416         elif c == '"':
417             self.__lex_finish_string()
418         elif ord(c) >= 0x20:
419             self.buffer += c
420         else:
421             self.__error("U+%04X must be escaped in quoted string" % ord(c))
422         return True
423
424     def __lex_input(self, c):
425         eat = self.lex_state(self, c)
426         assert eat is True or eat is False
427         return eat
428
429     def __parse_start(self, token, unused_string):
430         if token == '{':
431             self.__push_object()
432         elif token == '[':
433             self.__push_array()
434         else:
435             self.__error("syntax error at beginning of input")
436
437     def __parse_end(self, unused_token, unused_string):
438         self.__error("trailing garbage at end of input")
439
440     def __parse_object_init(self, token, string):
441         if token == '}':
442             self.__parser_pop()
443         else:
444             self.__parse_object_name(token, string)
445
446     def __parse_object_name(self, token, string):
447         if token == 'string':
448             self.member_name = string
449             self.parse_state = Parser.__parse_object_colon
450         else:
451             self.__error("syntax error parsing object expecting string")
452
453     def __parse_object_colon(self, token, unused_string):
454         if token == ":":
455             self.parse_state = Parser.__parse_object_value
456         else:
457             self.__error("syntax error parsing object expecting ':'")
458
459     def __parse_object_value(self, token, string):
460         self.__parse_value(token, string, Parser.__parse_object_next)
461
462     def __parse_object_next(self, token, unused_string):
463         if token == ",":
464             self.parse_state = Parser.__parse_object_name
465         elif token == "}":
466             self.__parser_pop()
467         else:
468             self.__error("syntax error expecting '}' or ','")
469
470     def __parse_array_init(self, token, string):
471         if token == ']':
472             self.__parser_pop()
473         else:
474             self.__parse_array_value(token, string)
475
476     def __parse_array_value(self, token, string):
477         self.__parse_value(token, string, Parser.__parse_array_next)
478
479     def __parse_array_next(self, token, unused_string):
480         if token == ",":
481             self.parse_state = Parser.__parse_array_value
482         elif token == "]":
483             self.__parser_pop()
484         else:
485             self.__error("syntax error expecting ']' or ','")
486
487     def __parser_input(self, token, string=None):
488         self.lex_state = Parser.__lex_start
489         self.buffer = ""
490         self.parse_state(self, token, string)
491
492     def __put_value(self, value):
493         top = self.stack[-1]
494         if type(top) == dict:
495             top[self.member_name] = value
496         else:
497             top.append(value)
498
499     def __parser_push(self, new_json, next_state):
500         if len(self.stack) < Parser.MAX_HEIGHT:
501             if len(self.stack) > 0:
502                 self.__put_value(new_json)
503             self.stack.append(new_json)
504             self.parse_state = next_state
505         else:
506             self.__error("input exceeds maximum nesting depth %d" %
507                          Parser.MAX_HEIGHT)
508
509     def __push_object(self):
510         self.__parser_push({}, Parser.__parse_object_init)
511
512     def __push_array(self):
513         self.__parser_push([], Parser.__parse_array_init)
514
515     def __parser_pop(self):
516         if len(self.stack) == 1:
517             self.parse_state = Parser.__parse_end
518             if not self.check_trailer:
519                 self.done = True
520         else:
521             self.stack.pop()
522             top = self.stack[-1]
523             if type(top) == list:
524                 self.parse_state = Parser.__parse_array_next
525             else:
526                 self.parse_state = Parser.__parse_object_next
527
528     def __parse_value(self, token, string, next_state):
529         if token in [False, None, True] or type(token) in [int, long, float]:
530             self.__put_value(token)
531         elif token == 'string':
532             self.__put_value(string)
533         else:
534             if token == '{':
535                 self.__push_object()
536             elif token == '[':
537                 self.__push_array()
538             else:
539                 self.__error("syntax error expecting value")
540             return
541         self.parse_state = next_state
542
543     def __error(self, message):
544         if self.error is None:
545             self.error = ("line %d, column %d, byte %d: %s"
546                           % (self.line_number, self.column_number,
547                              self.byte_number, message))
548             self.done = True
549
550     def feed(self, s):
551         i = 0
552         while True:
553             if self.done or i >= len(s):
554                 return i
555
556             c = s[i]
557             if self.__lex_input(c):
558                 self.byte_number += 1
559                 if c == '\n':
560                     self.column_number = 0
561                     self.line_number += 1
562                 else:
563                     self.column_number += 1
564
565                 i += 1
566
567     def is_done(self):
568         return self.done
569
570     def finish(self):
571         if self.lex_state == Parser.__lex_start:
572             pass
573         elif self.lex_state in (Parser.__lex_string,
574                                 Parser.__lex_string_escape):
575             self.__error("unexpected end of input in quoted string")
576         else:
577             self.__lex_input(" ")
578
579         if self.parse_state == Parser.__parse_start:
580             self.__error("empty input stream")
581         elif self.parse_state != Parser.__parse_end:
582             self.__error("unexpected end of input")
583
584         if self.error is None:
585             assert len(self.stack) == 1
586             return self.stack.pop()
587         else:
588             return self.error