#/usr/bin/ruby ## # Ruby API for tagthe.net # Check http://tagthe.net/fordevelopers # (c) Benjamin Ferrari # license: http://www.gnu.org/licenses/lgpl.html # Email : benjamin.ferrari@knallgrau.at # # this is meant as a successor to Pratik Naik's excellent ruby-api. # ( see http://null.in/2006/05/26/tagthenet-ruby-api/ ). # # Usage : tags = Tagthenet::parse :text => "Benjamin Ferrari" # or: tags = Tagthenet::parse :url => "http://knallgrau.at" # p tags["person"].first #=> "Benjamin Ferrari" # # The script can also run from the commandline as a standalone program. # The Usage here is './tagthenet.rb --help' # # Improvents to Pratik's code: # - requests are sent via the http post command. This allows texts # of arbitrary length to be send to the service. # - you can run this code as a standalone script from the command line. # Supported output format are CSV or YAML (default). # - tag dimensions are fetched dynamically. If we decide to add any # further dimensions to the web service, the api will reflect this. # - wrapped code into a module, to better support future additions. ## require 'net/http' require 'rexml/document' $AUTHOR = "Benjamin Ferrari" module Tagthenet API_URL = 'http://tagthe.net/api'; require "net/http" require 'rexml/document' require 'uri' #options is a hash. valid keys are :text and :url. #for example: Tagthenet::parse :text => "Hello World!" def self.parse options Tagger.new.parse options end class Tagger def parse options xml = parse_to_xml(options) xml2tags(xml) end private def parse_to_xml options options.each{|key,value|options[key] = URI.escape(value);} res = Net::HTTP.post_form(URI.parse(API_URL), options); res.body end def xml2tags xml tags = Hash.new{|h,k|h[k] = []} doc = REXML::Document.new(xml) doc.elements.each("memes/meme/dim") { |dim| type = dim.attributes["type"] doc.elements.each("memes/meme/dim[@type='#{type}']/item") {|item| tags[type] << item.text } } tags end end end if $0 == __FILE__ require "optparse" require "csv" require "yaml" opts = OptionParser.new opts.banner = "Tagthenet" format = :yaml dimensions = {} opts.on("-t","--text TEXT","analyse text and return tags as csv") do |text| dimensions = Tagthenet::parse :text => text end opts.on("-u","--url URL","analyse a url and return tags as csv") do |url| dimensions = Tagthenet::parse :url => url end opts.on("-f","--format FORMAT", "The output format. Can be either csv or yaml(default)") do|format| format = format.to_sym end opts.on_tail("-h", "--help", "Show this message") do puts opts exit end opts.parse!(ARGV) if format == :csv CSV::Writer.generate(STDOUT) do |csv| for dim, tags in dimensions for tag in tags.sort csv << [tag,dim] end end end else puts dimensions.to_yaml end end