#/usr/bin/ruby

##
# Ruby API for tagthe.net
# Check http://tagthe.net/fordevelopers
# (c) Benjamin Ferrari
# license: http://www.gnu.org/licenses/lgpl.html
# Email : benjamin.ferrari@knallgrau.at
# 

# this is meant as a successor to Pratik Naik's excellent ruby-api.
# ( see http://null.in/2006/05/26/tagthenet-ruby-api/ ). 
# 
# Usage : tags = Tagthenet::parse :text => "Benjamin Ferrari"
# or: tags = Tagthenet::parse :url =>  "http://knallgrau.at"
# p tags["person"].first #=> "Benjamin Ferrari" 
#
# The script can also run from the commandline as a standalone program.
# The Usage here is './tagthenet.rb --help'
#
# Improvents to Pratik's code:
#  - requests are sent via the http post command. This allows texts
#    of arbitrary length to be send to the service.
#  - you can run this code as a standalone script from the command line.
#    Supported output format are CSV or YAML (default).
#  - tag dimensions are fetched dynamically. If we decide to add any 
#    further dimensions to the web service, the api will reflect this.
#  - wrapped code into a module, to better support future additions. 
##

require 'net/http'
require 'rexml/document'

$AUTHOR = "Benjamin Ferrari"

module Tagthenet

  API_URL = 'http://tagthe.net/api';

  require "net/http"
  require 'rexml/document'
  require 'uri'

  #options is a hash. valid keys are :text and :url.
  #for example: Tagthenet::parse :text => "Hello World!" 
  def self.parse options
    Tagger.new.parse options
  end

  
  class Tagger

    def parse options
      xml = parse_to_xml(options)
      xml2tags(xml)
    end

    private

    def parse_to_xml options
      options.each{|key,value|options[key] = URI.escape(value);}
      res = Net::HTTP.post_form(URI.parse(API_URL), options);
      res.body
    end

    def xml2tags xml
      tags = Hash.new{|h,k|h[k] = []}
      doc = REXML::Document.new(xml) 
      doc.elements.each("memes/meme/dim") { |dim| 
        type = dim.attributes["type"] 
        doc.elements.each("memes/meme/dim[@type='#{type}']/item") {|item| 
          tags[type] << item.text 
        }
      }
      tags
    end

  end

end



if $0 == __FILE__
  require "optparse"
  require "csv"
  require "yaml"
  opts =  OptionParser.new 
  opts.banner = "Tagthenet"

  format = :yaml

  dimensions = {}
  opts.on("-t","--text TEXT","analyse text and return tags as csv") do |text|
    dimensions = Tagthenet::parse :text => text
  end

  opts.on("-u","--url URL","analyse a url and return tags as csv") do |url|
    dimensions = Tagthenet::parse :url => url
  end
  opts.on("-f","--format FORMAT",
          "The output format. Can be either csv or yaml(default)") do|format| 
    format = format.to_sym
  end

  opts.on_tail("-h", "--help", "Show this message") do
    puts opts
    exit
  end

  opts.parse!(ARGV)

  if format == :csv
    CSV::Writer.generate(STDOUT) do |csv|
      for dim, tags in dimensions
        for tag in tags.sort
          csv << [tag,dim]
        end
      end
    end
  else
    puts dimensions.to_yaml
  end
end
