1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 from six.moves import range
21 __pychecker__ = 'no-stringiter'
23 escapes = {ord('"'): u"\\\"",
31 if esc not in escapes:
32 escapes[esc] = u"\\u%04x" % esc
37 class _Serializer(object):
38 def __init__(self, stream, pretty, sort_keys):
41 self.sort_keys = sort_keys
44 def __serialize_string(self, s):
45 self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
47 def __indent_line(self):
49 self.stream.write('\n')
50 self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
52 def serialize(self, obj):
54 self.stream.write(u"null")
56 self.stream.write(u"false")
58 self.stream.write(u"true")
59 elif isinstance(obj, six.integer_types):
60 self.stream.write(u"%d" % obj)
61 elif isinstance(obj, float):
62 self.stream.write("%.15g" % obj)
63 elif isinstance(obj, six.text_type):
64 # unicode() on Python 2, or str() in Python 3 (always unicode)
65 self.__serialize_string(obj)
66 elif isinstance(obj, str):
67 # This is for Python 2, where this comes out to unicode(str()).
68 # For Python 3, it's str(str()), but it's harmless.
69 self.__serialize_string(six.text_type(obj))
70 elif isinstance(obj, dict):
71 self.stream.write(u"{")
77 items = sorted(obj.items())
79 items = six.iteritems(obj)
80 for i, (key, value) in enumerate(items):
82 self.stream.write(u",")
84 self.__serialize_string(six.text_type(key))
85 self.stream.write(u":")
87 self.stream.write(u' ')
90 self.stream.write(u"}")
92 elif isinstance(obj, (list, tuple)):
93 self.stream.write(u"[")
99 for i, value in enumerate(obj):
101 self.stream.write(u",")
103 self.serialize(value)
106 self.stream.write(u"]")
108 raise Exception("can't serialize %s as JSON" % obj)
111 def to_stream(obj, stream, pretty=False, sort_keys=True):
112 _Serializer(stream, pretty, sort_keys).serialize(obj)
115 def to_file(obj, name, pretty=False, sort_keys=True):
116 stream = open(name, "w")
118 to_stream(obj, stream, pretty, sort_keys)
123 def to_string(obj, pretty=False, sort_keys=True):
124 output = six.StringIO()
125 to_stream(obj, output, pretty, sort_keys)
126 s = output.getvalue()
131 def from_stream(stream):
132 p = Parser(check_trailer=True)
134 buf = stream.read(4096)
135 if buf == "" or p.feed(buf) != len(buf):
141 stream = open(name, "r")
143 return from_stream(stream)
149 if not isinstance(s, six.text_type):
150 # We assume the input is a string. We will only hit this case for a
151 # str in Python 2 which is not unicode, so we need to go ahead and
154 s = six.text_type(s, 'utf-8')
155 except UnicodeDecodeError as e:
156 seq = ' '.join(["0x%2x" % ord(c)
157 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
158 return "not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq
159 p = Parser(check_trailer=True)
164 class Parser(object):
165 # Maximum height of parsing stack. #
168 def __init__(self, check_trailer=False):
169 self.check_trailer = check_trailer
172 self.lex_state = Parser.__lex_start
175 self.column_number = 0
179 self.parse_state = Parser.__parse_start
181 self.member_name = None
187 def __lex_start_space(self, c):
190 def __lex_start_alpha(self, c):
192 self.lex_state = Parser.__lex_keyword
194 def __lex_start_token(self, c):
195 self.__parser_input(c)
197 def __lex_start_number(self, c):
199 self.lex_state = Parser.__lex_number
201 def __lex_start_string(self, _):
202 self.lex_state = Parser.__lex_string
204 def __lex_start_error(self, c):
205 if ord(c) >= 32 and ord(c) < 128:
206 self.__error("invalid character '%s'" % c)
208 self.__error("invalid character U+%04x" % ord(c))
210 __lex_start_actions = {}
212 __lex_start_actions[c] = __lex_start_space
213 for c in "abcdefghijklmnopqrstuvwxyz":
214 __lex_start_actions[c] = __lex_start_alpha
216 __lex_start_actions[c] = __lex_start_token
217 for c in "-0123456789":
218 __lex_start_actions[c] = __lex_start_number
219 __lex_start_actions['"'] = __lex_start_string
221 def __lex_start(self, c):
222 Parser.__lex_start_actions.get(
223 c, Parser.__lex_start_error)(self, c)
227 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
228 __lex_alpha[c] = True
230 def __lex_finish_keyword(self):
231 if self.buffer == "false":
232 self.__parser_input(False)
233 elif self.buffer == "true":
234 self.__parser_input(True)
235 elif self.buffer == "null":
236 self.__parser_input(None)
238 self.__error("invalid keyword '%s'" % self.buffer)
240 def __lex_keyword(self, c):
241 if c in Parser.__lex_alpha:
245 self.__lex_finish_keyword()
248 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
249 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
251 def __lex_finish_number(self):
253 m = Parser.__number_re.match(s)
255 sign, integer, fraction, exp = m.groups()
256 if (exp is not None and
257 (int(exp) > sys.maxsize or int(exp) < -sys.maxsize - 1)):
258 self.__error("exponent outside valid range")
261 if fraction is not None and len(fraction.lstrip('0')) == 0:
265 if fraction is not None:
266 sig_string += fraction
267 significand = int(sig_string)
270 if fraction is not None:
271 pow10 -= len(fraction)
276 self.__parser_input(0)
278 elif significand <= 2 ** 63:
279 while pow10 > 0 and significand <= 2 ** 63:
282 while pow10 < 0 and significand % 10 == 0:
286 ((not sign and significand < 2 ** 63) or
287 (sign and significand <= 2 ** 63))):
289 self.__parser_input(-significand)
291 self.__parser_input(significand)
295 if value == float("inf") or value == float("-inf"):
296 self.__error("number outside valid range")
299 # Suppress negative zero.
301 self.__parser_input(value)
302 elif re.match("-?0[0-9]", s):
303 self.__error("leading zeros not allowed")
304 elif re.match("-([^0-9]|$)", s):
305 self.__error("'-' must be followed by digit")
306 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
307 self.__error("decimal point must be followed by digit")
308 elif re.search("e[-+]?([^0-9]|$)", s):
309 self.__error("exponent must contain at least one digit")
311 self.__error("syntax error in number")
313 def __lex_number(self, c):
314 if c in ".0123456789eE-+":
318 self.__lex_finish_number()
321 __4hex_re = re.compile("[0-9a-fA-F]{4}")
323 def __lex_4hex(self, s):
325 self.__error("quoted string ends within \\u escape")
326 elif not Parser.__4hex_re.match(s):
327 self.__error("malformed \\u escape")
329 self.__error("null bytes not supported in quoted strings")
334 def __is_leading_surrogate(c):
335 """Returns true if 'c' is a Unicode code point for a leading
337 return c >= 0xd800 and c <= 0xdbff
340 def __is_trailing_surrogate(c):
341 """Returns true if 'c' is a Unicode code point for a trailing
343 return c >= 0xdc00 and c <= 0xdfff
346 def __utf16_decode_surrogate_pair(leading, trailing):
347 """Returns the unicode code point corresponding to leading surrogate
348 'leading' and trailing surrogate 'trailing'. The return value will not
349 make any sense if 'leading' or 'trailing' are not in the correct ranges
350 for leading or trailing surrogates."""
351 # Leading surrogate: 110110wwwwxxxxxx
352 # Trailing surrogate: 110111xxxxxxxxxx
353 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
354 w = (leading >> 6) & 0xf
357 x1 = trailing & 0x3ff
358 return (u << 16) | (x0 << 10) | x1
359 __unescape = {'"': u'"',
368 def __lex_finish_string(self):
372 backslash = inp.find('\\')
376 out += inp[:backslash]
377 inp = inp[backslash + 1:]
379 self.__error("quoted string may not end with backslash")
382 replacement = Parser.__unescape.get(inp[0])
383 if replacement is not None:
388 self.__error("bad escape \\%s" % inp[0])
391 c0 = self.__lex_4hex(inp[1:5])
396 if Parser.__is_leading_surrogate(c0):
397 if inp[:2] != u'\\u':
398 self.__error("malformed escaped surrogate pair")
400 c1 = self.__lex_4hex(inp[2:6])
403 if not Parser.__is_trailing_surrogate(c1):
404 self.__error("second half of escaped surrogate pair is "
405 "not trailing surrogate")
407 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
411 out += six.unichr(code_point)
412 self.__parser_input('string', out)
414 def __lex_string_escape(self, c):
416 self.lex_state = Parser.__lex_string
419 def __lex_string(self, c):
422 self.lex_state = Parser.__lex_string_escape
424 self.__lex_finish_string()
428 self.__error("U+%04X must be escaped in quoted string" % ord(c))
431 def __lex_input(self, c):
432 eat = self.lex_state(self, c)
433 assert eat is True or eat is False
436 def __parse_start(self, token, unused_string):
442 self.__error("syntax error at beginning of input")
444 def __parse_end(self, unused_token, unused_string):
445 self.__error("trailing garbage at end of input")
447 def __parse_object_init(self, token, string):
451 self.__parse_object_name(token, string)
453 def __parse_object_name(self, token, string):
454 if token == 'string':
455 self.member_name = string
456 self.parse_state = Parser.__parse_object_colon
458 self.__error("syntax error parsing object expecting string")
460 def __parse_object_colon(self, token, unused_string):
462 self.parse_state = Parser.__parse_object_value
464 self.__error("syntax error parsing object expecting ':'")
466 def __parse_object_value(self, token, string):
467 self.__parse_value(token, string, Parser.__parse_object_next)
469 def __parse_object_next(self, token, unused_string):
471 self.parse_state = Parser.__parse_object_name
475 self.__error("syntax error expecting '}' or ','")
477 def __parse_array_init(self, token, string):
481 self.__parse_array_value(token, string)
483 def __parse_array_value(self, token, string):
484 self.__parse_value(token, string, Parser.__parse_array_next)
486 def __parse_array_next(self, token, unused_string):
488 self.parse_state = Parser.__parse_array_value
492 self.__error("syntax error expecting ']' or ','")
494 def __parser_input(self, token, string=None):
495 self.lex_state = Parser.__lex_start
497 self.parse_state(self, token, string)
499 def __put_value(self, value):
501 if isinstance(top, dict):
502 top[self.member_name] = value
506 def __parser_push(self, new_json, next_state):
507 if len(self.stack) < Parser.MAX_HEIGHT:
508 if len(self.stack) > 0:
509 self.__put_value(new_json)
510 self.stack.append(new_json)
511 self.parse_state = next_state
513 self.__error("input exceeds maximum nesting depth %d" %
516 def __push_object(self):
517 self.__parser_push({}, Parser.__parse_object_init)
519 def __push_array(self):
520 self.__parser_push([], Parser.__parse_array_init)
522 def __parser_pop(self):
523 if len(self.stack) == 1:
524 self.parse_state = Parser.__parse_end
525 if not self.check_trailer:
530 if isinstance(top, list):
531 self.parse_state = Parser.__parse_array_next
533 self.parse_state = Parser.__parse_object_next
535 def __parse_value(self, token, string, next_state):
536 number_types = list(six.integer_types)
537 number_types.extend([float])
538 number_types = tuple(number_types)
539 if token in [False, None, True] or isinstance(token, number_types):
540 self.__put_value(token)
541 elif token == 'string':
542 self.__put_value(string)
549 self.__error("syntax error expecting value")
551 self.parse_state = next_state
553 def __error(self, message):
554 if self.error is None:
555 self.error = ("line %d, column %d, byte %d: %s"
556 % (self.line_number, self.column_number,
557 self.byte_number, message))
563 if self.done or i >= len(s):
567 if self.__lex_input(c):
568 self.byte_number += 1
570 self.column_number = 0
571 self.line_number += 1
573 self.column_number += 1
581 if self.lex_state == Parser.__lex_start:
583 elif self.lex_state in (Parser.__lex_string,
584 Parser.__lex_string_escape):
585 self.__error("unexpected end of input in quoted string")
587 self.__lex_input(" ")
589 if self.parse_state == Parser.__parse_start:
590 self.__error("empty input stream")
591 elif self.parse_state != Parser.__parse_end:
592 self.__error("unexpected end of input")
594 if self.error is None:
595 assert len(self.stack) == 1
596 return self.stack.pop()