#!ruby -Ku
# -*- coding: utf-8 -*-
require 'jcode'
require 'kconv'
require "GeoService"
#require File.dirname(__FILE__) + "/rbdams"
require "rbdams"
require "./simple-json-builder-1_09.rb"
include DAMS

$desinence_classes = {
  '都'=>1,'道'=>1,'府'=>1,'県'=>1,
  '市'=>2,'区'=>2,'町'=>2,'村'=>2,
  '川'=>3,'沢'=>3
}

class GeoWord
  attr_accessor :wid, :nodeinfo, :siblings, :children, :parents, :friends, :pair_has_selected

  @@serial_id = 0

  def initialize(geoword_info)
    @wid      = @@serial_id
    @nodeinfo = geoword_info
    @siblings = Array.new
    @children = Array.new
    @parents  = Array.new
    @friends  = Array.new
    @pair_has_selected = false
    @@serial_id += 1
  end

  def isParentOf(geonode)
    geonode.nodeinfo['upper_geowords'].each do |upper|
      return true if upper == @nodeinfo['geoword_full']
    end
    return false
  end

  def isChildOf(geonode)
    @nodeinfo['upper_geowords'].each do |upper|
      return true if upper == geonode.nodeinfo['geoword_full']
    end
    return false
  end

  def isSiblingOf(geonode)
    common_parents = @nodeinfo['upper_geowords'] & geonode.nodeinfo['upper_geowords']
    return common_parents.length > 0
  end

  def isFriendOf(geonode)
    desinence_class = $desinence_classes[@nodeinfo['desinence']]
    if desinence_class.nil?
      return @nodeinfo['desinence'] == geonode.nodeinfo['desinence']
    end
    return desinence_class == $desinence_classes[geonode.nodeinfo['desinence']]
  end

  def checkRelations(geonode)
    return if @nodeinfo['geoword'] == geonode.nodeinfo['geoword']
    if isParentOf(geonode)
      @children.push(geonode)
      geonode.parents.push(self)
    end
    if isChildOf(geonode)
      @parents.push(geonode)
      geonode.children.push(self)
    end
    if isSiblingOf(geonode)
      @siblings.push(geonode)
      geonode.siblings.push(self)
    end
    if isFriendOf(geonode)
      @friends.push(geonode)
      geonode.friends.push(self)
    end
  end

  def parents_ids
    result = []
    @parents.each { |word| result.push(word.wid) }
    return result
  end

  def children_ids
    result = []
    @children.each { |word| result.push(word.wid) }
    return result
  end

  def siblings_ids
    result = []
    @siblings.each { |word| result.push(word.wid) }
    return result
  end

  def friends_ids
    result = []
    @friends.each { |word| result.push(word.wid) }
    return result
  end

end

class GeoSequence
  attr_accessor :words_array, :result_words_array

  # constants
  SCORE_HAS_SIBLING = 10
  SCORE_HAS_CHILD   = 15
  SCORE_HAS_PARENT  = 20
  SCORE_HAS_FRIEND  =  5
  SCORE_PAIR_SELECTED = 40
  WINDOW_SIZE       = 200
  THRESHOLD_SCORE   = 0
  ALPHA             = 0.1

  def initialize(options = {})
    clear
    @score_has_sibling = options['has_sibling'] ? options['has_sibling'] : SCORE_HAS_SIBLING
    @score_has_child   = options['has_child'] ? options['has_child'] : SCORE_HAS_CHILD
    @score_has_parent  = options['has_parent'] ? options['has_parent'] : SCORE_HAS_PARENT 
    @score_has_friend  = options['has_friend'] ? options['has_friend'] : SCORE_HAS_FRIEND 
    @score_pair_selected = options['pair_selected'] ? options['pair_selected'] : SCORE_PAIR_SELECTED
    @window_size = options['window_size'] ? options['window_size'] : WINDOW_SIZE
    @threshold_score = options['threshold'] ? options['threshold'] : THRESHOLD_SCORE
    @alpha = options['alpha'] ? options['alpha'] : ALPHA
  end
  
  def addNewWord(pos, service, geonode)
    checkExpiredWord(pos)
    if geonode['subclassification2'] != '地名語'
      new_slot = geonode.dup.update({'pos'=>pos})
      @words_array.push(new_slot)
      return
    end
    geoword_entries = geonode['subclassification3'].split('/')
    new_slot = geonode.dup.update({'pos'=>pos})
    new_slot['words'] = Array.new
    geoword_entries.each do |geoword_entry|
      id, desinence = geoword_entry.split(':')
      geoword_info = service.getGeowordEntry(id)
      new_word = GeoWord.new(geoword_info)
      @words_array.each do |slot|
        slot['words'].each do |word|
          word.checkRelations(new_word)
        end if slot['words']
      end
      new_slot['words'].push(new_word)
    end
    @words_array.push(new_slot) if new_slot['words'].length > 0
  end


  def addNewWordAndAddress(pos, service, geonode, query, spatres)
    checkExpiredWord(pos)

    # 地名語（都道府県名、市区町村名）を登録
    geoword_entries = geonode['subclassification3'].split('/')
    nearest_entry = [nil, nil]
    # ジオコーディング結果に含まれる都道府県名、市区町村名などの要素の配列
    address_elements = spatres[0].split('/')
    geoword_entries.each do |geoword_entry|
      id, desinence = geoword_entry.split(':')
      geoword_info = service.getGeowordEntry(id)
      STDERR.printf(%Q!%s(%s) = !, geoword_info['geoword_full'], geoword_info['upper_geowords'].join(',')) if $DEBUG
      # 上位語が住所要素配列に含まれれば一意に定まる
      geoword_info['upper_geowords'].each do |upper_geoword|
        if address_elements.include?(upper_geoword)
          nearest_entry = [-1, geoword_info]
          break
        end
      end
      break if nearest_entry[0] && nearest_entry[0] < 0
      # 含まれない場合は二乗距離を測定し、一番近いものを選択する
      ydist = spatres[2] - geoword_info['latitude']
      xdist = spatres[1] - geoword_info['longitude']
      dist = xdist * xdist + ydist * ydist
      puts dist if $DEBUG
      if nearest_entry[0].nil? || dist < nearest_entry[0]
        nearest_entry = dist, geoword_info
      end
    end
    new_word = GeoWord.new(nearest_entry[1])
    @words_array.each do |slot|
      slot['words'].each do |word|
        word.checkRelations(new_word)
      end if slot['words']
    end
    @words_array.push(geonode.dup.update({'pos'=>pos,'words'=>[new_word]}))
    
    # 詳細住所を登録
    geoword_info = {
      'geoword_full'=>spatres[0],
      'upper_geowords'=>[geonode['surface']],
      'desinence'=>'',
      'latitude'=>spatres[2],
      'longitude'=>spatres[1],
      'level'=>spatres[3]
    }
    new_word = GeoWord.new(geoword_info)
    @words_array.each do |slot|
      slot['words'].each do |word|
        word.checkRelations(new_word)
      end if slot['words']
    end
    @words_array.push({'pos'=>pos + 1, 'surface'=>query, 'words'=>[new_word]})
  end
  
  def checkExpiredWord(pos)
    while (slot = @words_array.shift)
      if pos > 0 && slot['pos'] >= pos - @window_size
        @words_array.unshift(slot)
        return
      end

      if slot['words'].nil? # 地名語ではない
        @result_words_array.push(slot)
        next
      end
      
      words = slot['words']
      max_score = [-1, nil]
      for i in 0 .. words.size - 1
        word = words[i]
        score = calcScore(word)
        if score > max_score[0]
          max_score = [score, i]
        end
      end
      if max_score[0] < @threshold_score # スコアが閾値以下
        @result_words_array.push(slot)
        next
      end
      selected_word = words[max_score[1]]
      selected_word.siblings.each { |word| word.pair_has_selected = true }
      selected_word.children.each { |word| word.pair_has_selected = true }
      selected_word.parents.each { |word| word.pair_has_selected = true }
      selected_word.friends.each { |word| word.pair_has_selected = true }
      @result_words_array.push(slot.dup.update({'word'=>selected_word, 'score'=>max_score[0]}))
    end
  end

  def clear
    @words_array = Array.new
    @result_words_array = Array.new
    serial_id = 0
  end

  def flush
    checkExpiredWord(-1)
  end

  def drawResults(format = 'JSON')
    result = ''
    case format
    when 'JSON'
      @result_words_array.each do |slot|
        word = slot['word']
        next if word.nil?
        result_array = {'id'=>word.wid, 'surface'=>slot['surface'], 'pos'=>slot['pos'], 'geoword_id'=>word.nodeinfo['id'], 'geoword'=>word.nodeinfo['geoword'], 'geoword_full'=>word.nodeinfo['geoword_full'], 'desinence'=>word.nodeinfo['desinence'], 'upper_geowords'=>word.nodeinfo['upper_geowords'], 'lat'=>word.nodeinfo['latitude'], 'lon'=>word.nodeinfo['longitude'], 'note'=>word.nodeinfo['note'], 'parents'=>word.parents_ids, 'children'=>word.children_ids, 'siblings'=>word.siblings_ids, 'friends'=>word.friends_ids, 'Score'=>slot['score']}
        result += ",\n" if result.length > 0
#        result += sprintf(%q!{"id":%d,"surface":"%s","pos":%d,"geoword_id":"%s","geoword":"%s","geoword_full":"%s","desinence":"%s","upper_geowords":["%s"],"lat":%f,"lon":%f,"note":"%s","parents":[%s],"children":[%s],"siblings":[%s],"friends":[%s],"score":%d}!, word.wid, slot['surface'], slot['pos'], word.nodeinfo['id'], word.nodeinfo['geoword'], word.nodeinfo['geoword_full'], word.nodeinfo['desinence'], word.nodeinfo['upper_geowords'].join('","'), word.nodeinfo['latitude'], word.nodeinfo['longitude'], word.nodeinfo['note'],word.parents_ids.join(','), word.children_ids.join(','), word.siblings_ids.join(','), word.friends_ids.join(','), slot['score'])
        result += JsonBuilder.new.build(result_array)
      end
    when 'SPA'
      @result_words_array.each do |slot|
        word = slot['word']
        if word.nil?
          result += slot['surface']
          next
        end
        result += sprintf(%q!<spa id="%d" geoword="%s" upper_geowords="%s", lat="%f", lon="%f", note="%s", ilevel="%d", score="%d"!, word.wid, word.nodeinfo['geoword_full'], word.nodeinfo['upper_geowords'].join(','), word.nodeinfo['latitude'], word.nodeinfo['longitude'], word.nodeinfo['note'], word.nodeinfo['level'], slot['score'])
        result += sprintf(%q! parents="%s"!, word.parents_ids.join(',')) if word.parents_ids.length > 0
        result += sprintf(%q! children="%s"!, word.children_ids.join(',')) if word.children_ids.length > 0
        result += sprintf(%q! siblings="%s"!, word.siblings_ids.join(',')) if word.siblings_ids.length > 0
        result += sprintf(%q! friends="%s"!, word.friends_ids.join(',')) if word.friends_ids.length > 0
        result += ">" + slot['surface'] + '</spa>'
      end
    when 'SIMPLE'
      @result_words_array.each do |slot|
        word = slot['word']
        if word.nil?
          if slot['subclassification2'] == '人名'
            case slot['subclassification3']
            when '姓'
              result += sprintf(%q!<person yomi="%s" type="sirname">%s</person>!, slot['yomi'],  slot['surface'])
            when '名'
              result += sprintf(%q!<person yomi="%s" type="givenname">%s</person>!, slot['yomi'],  slot['surface'])
            else
              result += slot['surface']
            end
          else
            result += slot['surface']
          end
          next
        end
        pref = ''
        word.nodeinfo['upper_geowords'].each do |upper|
          if /^(.*?[都|北海道|府|県])$/ =~ upper
            pref = upper
            break
          end
        end
        if pref == ''
          if /^(.*?[都|北海道|府|県])\// =~ word.nodeinfo['geoword_full']
            pref = $1
          end
        end
        if /^HINAN/ =~ word.nodeinfo['id']
          result += sprintf(%q!<location place="%s" pref="%s" type="haven" lat="%f" lng="%f"!, word.nodeinfo['geoword_full'], pref, word.nodeinfo['latitude'], word.nodeinfo['longitude'])
          result += ">" + slot['surface'] + '</location>'
        elsif /^FAC/ =~ word.nodeinfo['id']
          result += sprintf(%q!<location place="%s" pref="%s" type="facility" lat="%f" lng="%f"!, word.nodeinfo['geoword_full'], pref, word.nodeinfo['latitude'], word.nodeinfo['longitude'])
          result += ">" + slot['surface'] + '</location>'
        elsif (word.nodeinfo['level'] && word.nodeinfo['geoword_full'].split('/').size > 1)
          result += sprintf(%q!<location place="%s" pref="%s" type="address" lvl="%s" lat="%f" lng="%f"!, word.nodeinfo['geoword_full'], pref, word.nodeinfo['level'] ? word.nodeinfo['level'] : '', word.nodeinfo['latitude'], word.nodeinfo['longitude'])
          result += ">" + slot['surface'] + '</location>'
        elsif (word.nodeinfo['desinence'] == '駅' && /駅$/ =~ slot['surface'])
          result += sprintf(%q!<location place="%s" pref="%s" type="station" lat="%f" lng="%f"!, word.nodeinfo['geoword_full'], pref, word.nodeinfo['latitude'], word.nodeinfo['longitude'])
          result += ">" + slot['surface'] + '</location>'
        else
          result += slot['surface']
        end
      end
    else
      @result_words_array.each do |slot|
        word = slot['word']
        if word.nil?
          result += sprintf(%Q!%s\t%s,%s,%s,%s,%s,%s,%s,%s,%s\n!, slot['surface'], slot['partOfSpeech'], slot['subclassification1'], slot['subclassification2'], slot['subclassification3'], slot['conjugatedForm'], slot['cunjugationType'], slot['originalForm'], slot['yomi'], slot['pronunciation'])
        else
          if word.nodeinfo['id'].nil?  # 詳細住所
            result += sprintf(%Q!%s\t名詞,固有名詞,住所,*,*,*,%s,*,*\n!, slot['surface'], word.nodeinfo['geoword_full'])
          else
            new_subclassification3 = word.nodeinfo['id'] + ':' + word.nodeinfo['desinence']
            result += sprintf(%Q!%s\t%s,%s,%s,%s,%s,%s,%s,%s,%s\n!, slot['surface'], slot['partOfSpeech'], slot['subclassification1'], slot['subclassification2'], new_subclassification3, slot['conjugatedForm'], slot['cunjugationType'], slot['originalForm'], slot['yomi'], slot['pronunciation'])
          end
        end
      end
    end
    @result_words_array.clear
    return result
  end

  def calcScore(geoword)
    score = 0
    score += @score_has_sibling * 2 ** (ALPHA * (geoword.siblings.length - 1)) if geoword.siblings.length > 0
    score += @score_has_child * 2 ** (ALPHA * (geoword.children.length - 1)) if geoword.children.length > 0
    score += @score_has_parent * 2 ** (ALPHA * (geoword.parents.length - 1)) if geoword.parents.length > 0
    score += @score_has_friend * 2 ** (ALPHA * (geoword.friends.length - 1)) if geoword.friends.length > 0
    score += @score_pair_selected if geoword.pair_has_selected
    return score
  end

  def analyze(nodes, service)
    i = 0
    while (i < nodes.length)
      STDERR.printf("i = %d\t'%s'\n", i, nodes[i]['surface']) if $DEBUG
      node = nodes[i]
      if node['surface'] == "\r"
        i += 1
        next
      end
      if (node['partOfSpeech'] != '名詞' || node['subclassification1'] != '固有名詞' || node['subclassification2'] != '地名語')
        addNewWord(i, service, node)
        i += 1
        next
      end
      
      geoword_id = false
      idstrings = node['subclassification3'].split('/')
      idstrings.each do |idstring|
        id, desinence = idstring.split(':')
        if (['都','道', '府', '県', '市', '区', '町', '村'].include?(desinence) && (Regexp.new(desinence + "$") =~ node['surface']))
          geoword_id = id
          break
        end
      end
      if (geoword_id == false)
        i += 1
        addNewWord(i, service, node)
        next
      end
      # 住所の可能性がある
      # 30文字分の文字列を取得する
      query = ""
      for j in i .. nodes.length - 1
        break if (query.jlength >= 30)
        query += nodes[j]['surface']
      end
      STDERR.printf("query: '%s'\n", query) if $DEBUG
      # アドレスジオコーダに問い合わせ
      results = dams_retrieve(Kconv::kconv(query, Kconv::EUC, Kconv::UTF8))
      score, matchlen = results.shift
      if (results.size == 0 || score < 4)
        # 一致度が不十分
        entry = service.getGeowordEntry(geoword_id)
        i += 1
        addNewWord(i, service, node)
        next
      end
      # 一致する住所が見つかった場合
      query = ""
      for j in i .. nodes.length - 1
        if nodes[j]['surface'] == "\r"
          j -= 1
          break
        end
        query += Kconv::kconv(nodes[j]['surface'], Kconv::EUC, Kconv::UTF8)
        break if (query.length >= matchlen)
      end
      r = results[0]
      r[0] = Kconv::kconv(r[0], Kconv::UTF8, Kconv::EUC)
      query = Kconv::kconv(query, Kconv::UTF8, Kconv::EUC)
      query = query[node['surface'].length .. -1]
      addNewWordAndAddress(i, service, node, query, r)
      #    level = $level_strings[r[3]]
      i = j + 1
    end
    return self
  end

end
