# = Simple JSON parser & builder
#
# Author::  Chihiro Ito
# License:: Public domain (unlike other files)
# Support:: http://groups.google.com/group/webos-goodies/
#
# This file contains two simple JSON processing classes. JsonParser
# converts a JSON string to an array or a hash. JsonBuilder performs
# vice versa. These classes are standard compliant and are designed for
# stability and reliability. Especially JsonParser has UTF-8 validation
# functionality so you can avoid some kind of security attack.

require 'strscan'
require 'json' if RUBY_VERSION >= '1.9.0'


# = Simple JSON parser
#
# This class converts a JSON string to an array or a hash. If *json_str*
# contains a JSON form string, you can convert it like below.
#
#   ruby_obj = JsonParser.new.parse(json_str)
#
# If *json_str* has one or more invalid UTF-8 sequence, JsonParser throws
# exception by default. You can change this behavior to replacing with an
# arbitrary unicode character. See below for details.
class JsonParser

  #:stopdoc:
  RUBY19             = RUBY_VERSION >= '1.9.0'
  Debug              = false
  Name               = 'JsonParser'
  ERR_IllegalSyntax  = "[#{Name}] Syntax error"
  ERR_IllegalUnicode = "[#{Name}] Illegal unicode sequence"

  StringRegex = /\s*"((?:\\.|[^"\\])*)"/n
  ValueRegex  = /\s*(?:
		(true)|(false)|(null)|            # 1:true, 2:false, 3:null
		(?:\"((?:\\.|[^\"\\])*)\")|       # 4:String
		([-+]?\d+\.\d+(?:[eE][-+]?\d+)?)| # 5:Float
		([-+]?\d+)|                       # 6:Integer
		(\{)|(\[))/xn                     # 7:Hash, 8:Array
  #:startdoc:

  # Create a new instance of JsonParser. *options* can contain these values.
  # [:validation]
  #     If set to false, UTF-8 validation is disabled. true by default.
  # [:surrogate]
  #     If set to false, surrogate pair support is disabled. true by default.
  # [:malformed_chr]
  #     An invalid sequence in JSON string will be replaced with this value.
  #     If set to nil, An exception will be thrown in this situation.
  #     nil by default.
  # [:compatible]
  #     If set to true, Ruby1.9's JSON module is never used. false by default.
  def initialize(options = {})
    @default_validation    = options.has_key?(:validation)    ? options[:validation]    : true
    @default_surrogate     = options.has_key?(:surrogate)     ? options[:surrogate]     : true
    @default_malformed_chr = options.has_key?(:malformed_chr) ? options[:malformed_chr] : nil
    @default_compatible    = options.has_key?(:compatible)    ? options[:compatible]    : false
  end

  # Convert *str* to an array or hash.
  # [str]
  #     A JSON form string. This must be encoded using UTF-8.
  # [options]
  #     Same as new.
  def parse(str, options = {})
    @enable_validation = options.has_key?(:validation)    ? options[:validation]    : @default_validation
    @enable_surrogate  = options.has_key?(:surrogate)     ? options[:surrogate]     : @default_surrogate
    @malformed_chr     = options.has_key?(:malformed_chr) ? options[:malformed_chr] : @default_malformed_chr
    @compatible        = options.has_key?(:compatible)    ? options[:compatible]    : @default_compatible
    @malformed_chr = @malformed_chr[0].ord if String === @malformed_chr
    if RUBY19
      str = (str.encode('UTF-8') rescue str.dup)
      if @enable_validation && !@malformed_chr
        raise err_msg(ERR_IllegalUnicode) unless str.valid_encoding?
        @enable_validation = false
      end
      if !@enable_validation && @enable_surrogate && !@malformed_chr && !@compatible
        begin
          return JSON.parse(str, :max_nesting => false)
        rescue JSON::JSONError => e
          exception = RuntimeError.new(e.message)
          exception.set_backtrace(e.backtrace)
          raise exception
        end
      end
      str.force_encoding('ASCII-8BIT')
    end
    @scanner = StringScanner.new(str)
    obj = case get_symbol[0]
          when ?{ then parse_hash
          when ?[ then parse_array
          else         raise err_msg(ERR_IllegalSyntax)
          end
    @scanner = nil
    obj
  end

  private #---------------------------------------------------------

  def validate_string(str, malformed_chr = nil)
    code  = 0
    rest  = 0
    range = nil
    ucs   = []
    str.each_byte do |c|
      if rest <= 0
        case c
        when 0x01..0x7f then rest = 0 ; ucs << c
        when 0xc0..0xdf then rest = 1 ; code = c & 0x1f ; range = 0x00080..0x0007ff
        when 0xe0..0xef then rest = 2 ; code = c & 0x0f ; range = 0x00800..0x00ffff
        when 0xf0..0xf7 then rest = 3 ; code = c & 0x07 ; range = 0x10000..0x10ffff
        else                 ucs << handle_malformed_chr(malformed_chr)
        end
      elsif (0x80..0xbf) === c
        code = (code << 6) | (c & 0x3f)
        if (rest -= 1) <= 0
          if !(range === code) || (0xd800..0xdfff) === code
            code = handle_malformed_chr(malformed_chr)
          end
          ucs << code
        end
      else
        ucs << handle_malformed_chr(malformed_chr)
        rest = 0
      end
    end
    ucs << handle_malformed_chr(malformed_chr) if rest > 0
    ucs.pack('U*')
  end

  def handle_malformed_chr(chr)
    raise err_msg(ERR_IllegalUnicode) unless chr
    chr
  end

  def err_msg(err)
    err + (Debug ? " #{@scanner.string[[0, @scanner.pos - 8].max,16].inspect}" : "")
  end

  def unescape_string(str)
    str = str.gsub(/\\(["\\\/bfnrt])/n) do
      $1.tr('"\\/bfnrt', "\"\\/\b\f\n\r\t")
    end.gsub(/(\\u[0-9a-fA-F]{4})+/n) do |matched|
      seq = matched.scan(/\\u([0-9a-fA-F]{4})/n).flatten.map { |c| c.hex }
      if @enable_surrogate
        seq.each_index do |index|
          if seq[index] && (0xd800..0xdbff) === seq[index]
            n = index + 1
            raise err_msg(ERR_IllegalUnicode) unless seq[n] && 0xdc00..0xdfff === seq[n]
            seq[index] = 0x10000 + ((seq[index] & 0x03ff) << 10) + (seq[n] & 0x03ff)
            seq[n] = nil
          end
        end.compact!
      end
      seq.pack('U*')
    end
    str = validate_string(str, @malformed_chr) if @enable_validation
    RUBY19 ? str.force_encoding('UTF-8') : str
  end

  def get_symbol
    raise err_msg(ERR_IllegalSyntax) unless @scanner.scan(/\s*(.)/n)
    @scanner[1]
  end

  def peek_symbol
    @scanner.match?(/\s*(.)/n) ? @scanner[1] : nil
  end

  def parse_string
    raise err_msg(ERR_IllegalSyntax) unless @scanner.scan(StringRegex)
    unescape_string(@scanner[1])
  end

  def parse_value
    raise err_msg(ERR_IllegalSyntax) unless @scanner.scan(ValueRegex)
    case
    when @scanner[1] then true
    when @scanner[2] then false
    when @scanner[3] then nil
    when @scanner[4] then unescape_string(@scanner[4])
    when @scanner[5] then @scanner[5].to_f
    when @scanner[6] then @scanner[6].to_i
    when @scanner[7] then parse_hash
    when @scanner[8] then parse_array
    else                  raise err_msg(ERR_IllegalSyntax)
    end
  end

  def parse_hash
    obj = {}
    if peek_symbol[0] == ?} then get_symbol ; return obj ; end
    while true
      index = parse_string
      raise err_msg(ERR_IllegalSyntax) unless get_symbol[0] == ?:
      value = parse_value
      obj[index] = value
      case get_symbol[0]
      when ?} then return obj
      when ?, then next
      else         raise err_msg(ERR_IllegalSyntax)
      end
    end
  end

  def parse_array
    obj = []
    if peek_symbol[0] == ?] then get_symbol ; return obj ; end
    while true
      obj << parse_value
      case get_symbol[0]
      when ?] then return obj
      when ?, then next
      else         raise err_msg(ERR_IllegalSyntax)
      end
    end
  end

end

# = Simple JSON builder
#
# This class converts an Ruby object to a JSON string. you can convert
# *ruby_obj* like below.
#
#   json_str = JsonBuilder.new.build(ruby_obj)
#
# *ruby_obj* must satisfy these conditions.
# - It must support to_s method, otherwise must be an array, a hash or nil.
# - All keys of a hash must support to_s method.
# - All values of an array or a hash must satisfy all conditions mentioned above.
#
# If the *ruby_obj* is not an array or a hash, it will be converted to an array
# with a single element.
class JsonBuilder

  #:stopdoc:
  RUBY19             = RUBY_VERSION >= '1.9.0'
  Name               = 'JsonBuilder'
  ERR_NestIsTooDeep  = "[#{Name}] Array / Hash nested too deep."
  ERR_NaN            = "[#{Name}] NaN and Infinite are not permitted in JSON."
  #:startdoc:

  # Create a new instance of JsonBuilder. *options* can contain these values.
  # [:max_nest]
  #     If Array / Hash is nested more than this value, an exception would be thrown.
  #     19 by default.
  # [:nan]
  #     NaN is replaced with this value. If nil or false, an exception will be thrown.
  #     nil by default.
  def initialize(options = {})
    @default_max_nest = options.has_key?(:max_nest) ? options[:max_nest] : 19
    @default_nan      = options.has_key?(:nan)      ? options[:nan]      : nil
  end

  # Convert *obj* to a JSON form string.
  # [obj]
  #     A ruby object. this object must satisfy all conditions mentioned above.
  # [options]
  #     Same as new.
  def build(obj, options = {})
    @max_nest = options.has_key?(:max_nest) ? options[:max_nest] : @default_max_nest
    @nan      = options.has_key?(:nan)      ? options[:nan]      : @default_nan
    if RUBY19 && !@nan
      begin
        JSON.generate(obj, :max_nesting => @max_nest, :check_circular => false)
      rescue JSON::JSONError => e
        exception = RuntimeError.new(e.message)
        exception.set_backtrace(e.backtrace)
        raise exception
      end
    else
      case obj
      when Array then build_array(obj, 0)
      when Hash  then build_object(obj, 0)
      else            build_array([obj], 0)
      end
    end
  end

  private #---------------------------------------------------------

  ESCAPE_CONVERSION = {
    '"'    => '\"', '\\'   => '\\\\', '/'    => '\/', "\x08" => '\b',
    "\x0c" => '\f', "\x0a" => '\n',   "\x0d" => '\r', "\x09" => '\t'
  }
  if RUBY19
    def escape(str)
      str = str.to_s.encode('UTF-8')
      str.force_encoding('ASCII-8BIT')
      str = str.gsub(/[^\x20-\x21\x23-\x2e\x30-\x5b\x5d-\xff]/n) do |chr|
        escaped = ESCAPE_CONVERSION[chr]
        escaped = sprintf("\\u%04X", chr[0].ord) unless escaped
        escaped
      end
      str.force_encoding('UTF-8')
      "\"#{str}\""
    end
  else
    def escape(str)
      str = str.gsub(/[^\x20-\x21\x23-\x2e\x30-\x5b\x5d-\xff]/n) do |chr|
        escaped = ESCAPE_CONVERSION[chr]
        escaped = sprintf("\\u%04x", chr[0]) unless escaped
        escaped
      end
      "\"#{str}\""
    end
  end

  def build_value(obj, level)
    case obj
    when Integer, TrueClass, FalseClass then obj.to_s
    when Float    then raise ERR_NaN unless obj.finite? || (obj = @nan) ; obj.to_s
    when NilClass then 'null'
    when Array    then build_array(obj, level + 1)
    when Hash     then build_object(obj, level + 1)
    else               escape(obj)
    end
  end

  def build_array(obj, level)
    raise ERR_NestIsTooDeep if level >= @max_nest
    '[' + obj.map { |item| build_value(item, level) }.join(',') + ']'
  end

  def build_object(obj, level)
    raise ERR_NestIsTooDeep if level >= @max_nest
    '{' + obj.map do |item|
      "#{build_value(item[0].to_s,level)}:#{build_value(item[1],level)}"
    end.join(',') + '}'
  end

end
