1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 from six.moves import range
21 __pychecker__ = 'no-stringiter'
23 escapes = {ord('"'): u"\\\"",
31 if esc not in escapes:
32 escapes[esc] = u"\\u%04x" % esc
37 class _Serializer(object):
38 def __init__(self, stream, pretty, sort_keys):
41 self.sort_keys = sort_keys
44 def __serialize_string(self, s):
45 self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
47 def __indent_line(self):
49 self.stream.write('\n')
50 self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
52 def serialize(self, obj):
54 self.stream.write(u"null")
56 self.stream.write(u"false")
58 self.stream.write(u"true")
59 elif type(obj) in (int, long):
60 self.stream.write(u"%d" % obj)
61 elif type(obj) == float:
62 self.stream.write("%.15g" % obj)
63 elif type(obj) == unicode:
64 self.__serialize_string(obj)
65 elif type(obj) == str:
66 self.__serialize_string(unicode(obj))
67 elif type(obj) == dict:
68 self.stream.write(u"{")
74 items = sorted(obj.items())
76 items = obj.iteritems()
77 for i, (key, value) in enumerate(items):
79 self.stream.write(u",")
81 self.__serialize_string(unicode(key))
82 self.stream.write(u":")
84 self.stream.write(u' ')
87 self.stream.write(u"}")
89 elif type(obj) in (list, tuple):
90 self.stream.write(u"[")
96 for i, value in enumerate(obj):
98 self.stream.write(u",")
100 self.serialize(value)
103 self.stream.write(u"]")
105 raise Exception("can't serialize %s as JSON" % obj)
108 def to_stream(obj, stream, pretty=False, sort_keys=True):
109 _Serializer(stream, pretty, sort_keys).serialize(obj)
112 def to_file(obj, name, pretty=False, sort_keys=True):
113 stream = open(name, "w")
115 to_stream(obj, stream, pretty, sort_keys)
120 def to_string(obj, pretty=False, sort_keys=True):
121 output = StringIO.StringIO()
122 to_stream(obj, output, pretty, sort_keys)
123 s = output.getvalue()
128 def from_stream(stream):
129 p = Parser(check_trailer=True)
131 buf = stream.read(4096)
132 if buf == "" or p.feed(buf) != len(buf):
138 stream = open(name, "r")
140 return from_stream(stream)
147 s = unicode(s, 'utf-8')
148 except UnicodeDecodeError as e:
149 seq = ' '.join(["0x%2x" % ord(c)
150 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
151 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
152 p = Parser(check_trailer=True)
157 class Parser(object):
158 # Maximum height of parsing stack. #
161 def __init__(self, check_trailer=False):
162 self.check_trailer = check_trailer
165 self.lex_state = Parser.__lex_start
168 self.column_number = 0
172 self.parse_state = Parser.__parse_start
174 self.member_name = None
180 def __lex_start_space(self, c):
183 def __lex_start_alpha(self, c):
185 self.lex_state = Parser.__lex_keyword
187 def __lex_start_token(self, c):
188 self.__parser_input(c)
190 def __lex_start_number(self, c):
192 self.lex_state = Parser.__lex_number
194 def __lex_start_string(self, _):
195 self.lex_state = Parser.__lex_string
197 def __lex_start_error(self, c):
198 if ord(c) >= 32 and ord(c) < 128:
199 self.__error("invalid character '%s'" % c)
201 self.__error("invalid character U+%04x" % ord(c))
203 __lex_start_actions = {}
205 __lex_start_actions[c] = __lex_start_space
206 for c in "abcdefghijklmnopqrstuvwxyz":
207 __lex_start_actions[c] = __lex_start_alpha
209 __lex_start_actions[c] = __lex_start_token
210 for c in "-0123456789":
211 __lex_start_actions[c] = __lex_start_number
212 __lex_start_actions['"'] = __lex_start_string
214 def __lex_start(self, c):
215 Parser.__lex_start_actions.get(
216 c, Parser.__lex_start_error)(self, c)
220 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
221 __lex_alpha[c] = True
223 def __lex_finish_keyword(self):
224 if self.buffer == "false":
225 self.__parser_input(False)
226 elif self.buffer == "true":
227 self.__parser_input(True)
228 elif self.buffer == "null":
229 self.__parser_input(None)
231 self.__error("invalid keyword '%s'" % self.buffer)
233 def __lex_keyword(self, c):
234 if c in Parser.__lex_alpha:
238 self.__lex_finish_keyword()
241 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
242 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
244 def __lex_finish_number(self):
246 m = Parser.__number_re.match(s)
248 sign, integer, fraction, exp = m.groups()
249 if (exp is not None and
250 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
251 self.__error("exponent outside valid range")
254 if fraction is not None and len(fraction.lstrip('0')) == 0:
258 if fraction is not None:
259 sig_string += fraction
260 significand = int(sig_string)
263 if fraction is not None:
264 pow10 -= len(fraction)
269 self.__parser_input(0)
271 elif significand <= 2 ** 63:
272 while pow10 > 0 and significand <= 2 ** 63:
275 while pow10 < 0 and significand % 10 == 0:
279 ((not sign and significand < 2 ** 63) or
280 (sign and significand <= 2 ** 63))):
282 self.__parser_input(-significand)
284 self.__parser_input(significand)
288 if value == float("inf") or value == float("-inf"):
289 self.__error("number outside valid range")
292 # Suppress negative zero.
294 self.__parser_input(value)
295 elif re.match("-?0[0-9]", s):
296 self.__error("leading zeros not allowed")
297 elif re.match("-([^0-9]|$)", s):
298 self.__error("'-' must be followed by digit")
299 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
300 self.__error("decimal point must be followed by digit")
301 elif re.search("e[-+]?([^0-9]|$)", s):
302 self.__error("exponent must contain at least one digit")
304 self.__error("syntax error in number")
306 def __lex_number(self, c):
307 if c in ".0123456789eE-+":
311 self.__lex_finish_number()
314 __4hex_re = re.compile("[0-9a-fA-F]{4}")
316 def __lex_4hex(self, s):
318 self.__error("quoted string ends within \\u escape")
319 elif not Parser.__4hex_re.match(s):
320 self.__error("malformed \\u escape")
322 self.__error("null bytes not supported in quoted strings")
327 def __is_leading_surrogate(c):
328 """Returns true if 'c' is a Unicode code point for a leading
330 return c >= 0xd800 and c <= 0xdbff
333 def __is_trailing_surrogate(c):
334 """Returns true if 'c' is a Unicode code point for a trailing
336 return c >= 0xdc00 and c <= 0xdfff
339 def __utf16_decode_surrogate_pair(leading, trailing):
340 """Returns the unicode code point corresponding to leading surrogate
341 'leading' and trailing surrogate 'trailing'. The return value will not
342 make any sense if 'leading' or 'trailing' are not in the correct ranges
343 for leading or trailing surrogates."""
344 # Leading surrogate: 110110wwwwxxxxxx
345 # Trailing surrogate: 110111xxxxxxxxxx
346 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
347 w = (leading >> 6) & 0xf
350 x1 = trailing & 0x3ff
351 return (u << 16) | (x0 << 10) | x1
352 __unescape = {'"': u'"',
361 def __lex_finish_string(self):
365 backslash = inp.find('\\')
369 out += inp[:backslash]
370 inp = inp[backslash + 1:]
372 self.__error("quoted string may not end with backslash")
375 replacement = Parser.__unescape.get(inp[0])
376 if replacement is not None:
381 self.__error("bad escape \\%s" % inp[0])
384 c0 = self.__lex_4hex(inp[1:5])
389 if Parser.__is_leading_surrogate(c0):
390 if inp[:2] != u'\\u':
391 self.__error("malformed escaped surrogate pair")
393 c1 = self.__lex_4hex(inp[2:6])
396 if not Parser.__is_trailing_surrogate(c1):
397 self.__error("second half of escaped surrogate pair is "
398 "not trailing surrogate")
400 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
404 out += unichr(code_point)
405 self.__parser_input('string', out)
407 def __lex_string_escape(self, c):
409 self.lex_state = Parser.__lex_string
412 def __lex_string(self, c):
415 self.lex_state = Parser.__lex_string_escape
417 self.__lex_finish_string()
421 self.__error("U+%04X must be escaped in quoted string" % ord(c))
424 def __lex_input(self, c):
425 eat = self.lex_state(self, c)
426 assert eat is True or eat is False
429 def __parse_start(self, token, unused_string):
435 self.__error("syntax error at beginning of input")
437 def __parse_end(self, unused_token, unused_string):
438 self.__error("trailing garbage at end of input")
440 def __parse_object_init(self, token, string):
444 self.__parse_object_name(token, string)
446 def __parse_object_name(self, token, string):
447 if token == 'string':
448 self.member_name = string
449 self.parse_state = Parser.__parse_object_colon
451 self.__error("syntax error parsing object expecting string")
453 def __parse_object_colon(self, token, unused_string):
455 self.parse_state = Parser.__parse_object_value
457 self.__error("syntax error parsing object expecting ':'")
459 def __parse_object_value(self, token, string):
460 self.__parse_value(token, string, Parser.__parse_object_next)
462 def __parse_object_next(self, token, unused_string):
464 self.parse_state = Parser.__parse_object_name
468 self.__error("syntax error expecting '}' or ','")
470 def __parse_array_init(self, token, string):
474 self.__parse_array_value(token, string)
476 def __parse_array_value(self, token, string):
477 self.__parse_value(token, string, Parser.__parse_array_next)
479 def __parse_array_next(self, token, unused_string):
481 self.parse_state = Parser.__parse_array_value
485 self.__error("syntax error expecting ']' or ','")
487 def __parser_input(self, token, string=None):
488 self.lex_state = Parser.__lex_start
490 self.parse_state(self, token, string)
492 def __put_value(self, value):
494 if type(top) == dict:
495 top[self.member_name] = value
499 def __parser_push(self, new_json, next_state):
500 if len(self.stack) < Parser.MAX_HEIGHT:
501 if len(self.stack) > 0:
502 self.__put_value(new_json)
503 self.stack.append(new_json)
504 self.parse_state = next_state
506 self.__error("input exceeds maximum nesting depth %d" %
509 def __push_object(self):
510 self.__parser_push({}, Parser.__parse_object_init)
512 def __push_array(self):
513 self.__parser_push([], Parser.__parse_array_init)
515 def __parser_pop(self):
516 if len(self.stack) == 1:
517 self.parse_state = Parser.__parse_end
518 if not self.check_trailer:
523 if type(top) == list:
524 self.parse_state = Parser.__parse_array_next
526 self.parse_state = Parser.__parse_object_next
528 def __parse_value(self, token, string, next_state):
529 if token in [False, None, True] or type(token) in [int, long, float]:
530 self.__put_value(token)
531 elif token == 'string':
532 self.__put_value(string)
539 self.__error("syntax error expecting value")
541 self.parse_state = next_state
543 def __error(self, message):
544 if self.error is None:
545 self.error = ("line %d, column %d, byte %d: %s"
546 % (self.line_number, self.column_number,
547 self.byte_number, message))
553 if self.done or i >= len(s):
557 if self.__lex_input(c):
558 self.byte_number += 1
560 self.column_number = 0
561 self.line_number += 1
563 self.column_number += 1
571 if self.lex_state == Parser.__lex_start:
573 elif self.lex_state in (Parser.__lex_string,
574 Parser.__lex_string_escape):
575 self.__error("unexpected end of input in quoted string")
577 self.__lex_input(" ")
579 if self.parse_state == Parser.__parse_start:
580 self.__error("empty input stream")
581 elif self.parse_state != Parser.__parse_end:
582 self.__error("unexpected end of input")
584 if self.error is None:
585 assert len(self.stack) == 1
586 return self.stack.pop()