twitterのfollowingを増やすためのツールつくったー2

さらに改造したよ!

  • 俺のfollowingからたどった際の最大メモリ使用量が1.4GB→0.3GBと約1/5に!
  • 同、処理速度が数倍に!

HTMLパーサを無駄にバンバン作っていたのをやめただけでここまで高速に!俺のバカ!

require 'rubygems'
require 'hpricot'
require 'open-uri'
require "rexml/document" 
require 'net/http'
Net::HTTP.version_1_2

class TwitFriends < Array
	VERSION = '0.0.3'
	TW_HOME = 'http://twitter.com/'
	TW_FRIENDS = 'http://twitter.com/friends'
	TWAPI_FRIENDS = 'http://twitter.com/statuses/friends.xml'
	
	TIME_INTERVAL = 10
	ERROR_INTERVAL = 10
	MAX_PAGE = 1000
	TMP_DIR = './tmp/'

	class TwitterUser
		def initialize(user, html=nil)
			@user = user
			@html = html
			@friends = []
		end
		def size
			@friends.size
		end
		def friends_url
			TW_HOME + @user + '/friends'
		end
		def friends_path
			'/' + @user + '/friends'
		end
		attr_accessor(:user, :html, :friends)
	end
	
	class TwitterUsersList < Hash
		def add(user, friend)
			if self[user.user].nil? then
				self[user.user] = user
			end
			self[user.user].friends << friend
			self
		end
		def to_s
			str = ''
			self.each do |name, user|
				str << name + ': ' + user.friends.join(', ') + "\n"
			end
			str
		end
	end
	
	class WrongPasswordError < StandardError; end
	
	def initialize(user, pass)
		Dir.mkdir(TMP_DIR) if !FileTest.directory?(TMP_DIR)
		crypt_pass = 'Basic ' + [user + ':' + pass].pack('m')
		@tw_auth = {'Authorization' => crypt_pass}
		@template = nil
		@tw_login_name = user
		@tw_login_pass = pass
		open(TW_FRIENDS, @tw_auth){|f| @template = Hpricot(f) }
	end
	
	def get_my_users
		return @friends if @friends
		@friends = []
		xml = nil
		if file = saved?(TwitterUser.new(@tw_login_name), 1) then
			xml = file
		else
			begin
				open(TWAPI_FRIENDS, @tw_auth){|f| xml = f.read}
			rescue OpenURI::HTTPError
				print $!, ". wait and retry ...\n"
				sleep(ERROR_INTERVAL)
				retry
			end
			save_html(@tw_login_name, 1, xml)
		end
		xml = REXML::Document.new(xml)
		xml.elements.each('/users/user') do |ele|
			@friends << TwitterUser.new(ele.elements['screen_name'].text)
		end
		@friends
	end
	
	def tmp_file_name(name, page)
		TMP_DIR + name + '_' + page.to_s + '.html'
	end
	
	def save_html(name, page, body)
		File.open(tmp_file_name(name, page), "w") do |f|
			f.print body
		end
	end
	
	def saved?(user, page)
		if FileTest.exist?(filename = tmp_file_name(user.user, page)) then
			html_body = ''
			File.open(filename) do |f|
				html_body << f.read
			end
			return html_body
		end
		nil
	end
	
	def get_page(http, user, page)
		if tmp = saved?(user, page) then
			return tmp
		end
		http_req = Net::HTTP::Get.new(user.friends_path  + "?page=" + page.to_s)
		http_req['Connection'] = 'Keep-Alive'
		http_req.basic_auth(@tw_login_name, @tw_login_pass)
		sleep(TIME_INTERVAL)
		begin
			res = http.request(http_req)
			http_body = res.body
			case res.code
			when "302" then
				raise WrongPasswordError
			end
		rescue
			case $!
			when WrongPasswordError then
				print $!, ". id or password is wrong.\n"
				exit
			else
				print $!, "\nrestart http...\n"
				http.finish
				sleep(ERROR_INTERVAL)
				http.start
				retry
			end
		end
		save_html(user.user, page, http_body)
		http_body
	end
	
	def get_friends
		list = TwitterUsersList.new
		uri = URI.parse(TW_HOME)
		Net::HTTP.start(uri.host) do |http|
			print "getting #{@tw_login_name}'s friends...\n"
			get_my_users.each do |user|
				counter = 0
				while(true) do
					counter += 1
					print ' ' * 78 + "\r"
					print "getting #{user.user}'s page #{counter} ..."
					page = Hpricot(get_page(http, user, counter))
					page.search("tr.vcard").each do |html|
						name = html.search("a.uid").inner_html.gsub(/<[^>]*>/, '')
						list.add(TwitterUser.new(name, html.inner_html), user)
					end
					print "done. \r"
					break if page.search("a[@rel='me next']").empty?
					break if counter > MAX_PAGE
				end
				print "\n"
			end
		end
		self.replace(list.keys.sort!{|a,b|
			list[b].friends.size <=> list[a].friends.size
		}.map!{|name|
			[name, list[name], list[name].friends.size]
		})
	end
	
	def output_html(filename = 'out.html')
		get_friends
		@template.search("tr.vcard").remove
		str = ''
		self.each do |obj|
			break if obj[2] < 10
			str << '<tr class="vcard">'
			str << obj[1].html.to_s
			str << '</tr>'
			str << '<tr class="friends"><td colspan="2">' + obj[2].to_s + ': '
			str << obj[1].friends.map{|friend|
				'<a href="' + TW_HOME + '/' + friend.user + '">' + friend.user + '</a>'
			}.join(', ')
			str << '</td></tr>'
		end
		@template.search("table.doing").prepend(str)
		File.open(filename, "w") do |f|
			f.print @template.to_html
		end
	end
end

if $0 == __FILE__ then
	user = ARGV.shift; pass = ARGV.shift; output = ARGV.shift;
	if user.nil? or pass.nil? then
		print "use: ./#{$0} USER PASS [OUTPUT_FILE]\n"
		exit
	end
	TwitFriends.new(user, pass).output_html(output)
end;