#!/usr/bin/env ruby ### Don't need to edit below here %w{ rubygems hpricot open-uri }.each {|gem| require gem} class Twitterchive def initialize(tag) @tag = tag end def parse fields = ["id","title", "published","author/name","author/uri"] @entries = [fields.join(",") + ",link"] next_url = "http://search.twitter.com/search.atom?tag=#{@tag}&lang=all&rpp=50" while (next_url) begin puts next_url doc = Hpricot.parse(open(next_url)) @entries << (doc/:entry).collect do |entry| fields.collect {|f| (entry/f).inner_text }.join(",") + ",#{(entry/"link").first["href"]}" end next_url = (doc/"link[@rel=next]").first["href"] sleep(0.2) rescue next_url = nil end end end def save(filename = nil) File.open(filename || "#{@tag}.csv","w") do |file| file << @entries.join("\n") end end end if __FILE__ == $0 %w{ ostruct optparse }.each {|gem| require gem} # Parse the command line options options = OpenStruct.new opts = OptionParser.new opts.banner = "Usage: twitterchive.rb [options] archive_filename" opts.on("-t [tag]", "--tag [tag]", "Twitter hashtag to archive") {|tag| options.tag = tag } opts.on_tail("-h", "--help", "Show this message") do puts opts exit end file = opts.parse(ARGV) unless options.tag.nil? || options.tag == "" tag_archive = Twitterchive.new(options.tag) tag_archive.parse tag_archive.save(file.first) end end