twitterのfollowingを増やすためのツールつくったー2
さらに改造したよ!
- 俺のfollowingからたどった際の最大メモリ使用量が1.4GB→0.3GBと約1/5に!
- 同、処理速度が数倍に!
HTMLパーサを無駄にバンバン作っていたのをやめただけでここまで高速に!俺のバカ!
require 'rubygems' require 'hpricot' require 'open-uri' require "rexml/document" require 'net/http' Net::HTTP.version_1_2 class TwitFriends < Array VERSION = '0.0.3' TW_HOME = 'http://twitter.com/' TW_FRIENDS = 'http://twitter.com/friends' TWAPI_FRIENDS = 'http://twitter.com/statuses/friends.xml' TIME_INTERVAL = 10 ERROR_INTERVAL = 10 MAX_PAGE = 1000 TMP_DIR = './tmp/' class TwitterUser def initialize(user, html=nil) @user = user @html = html @friends = [] end def size @friends.size end def friends_url TW_HOME + @user + '/friends' end def friends_path '/' + @user + '/friends' end attr_accessor(:user, :html, :friends) end class TwitterUsersList < Hash def add(user, friend) if self[user.user].nil? then self[user.user] = user end self[user.user].friends << friend self end def to_s str = '' self.each do |name, user| str << name + ': ' + user.friends.join(', ') + "\n" end str end end class WrongPasswordError < StandardError; end def initialize(user, pass) Dir.mkdir(TMP_DIR) if !FileTest.directory?(TMP_DIR) crypt_pass = 'Basic ' + [user + ':' + pass].pack('m') @tw_auth = {'Authorization' => crypt_pass} @template = nil @tw_login_name = user @tw_login_pass = pass open(TW_FRIENDS, @tw_auth){|f| @template = Hpricot(f) } end def get_my_users return @friends if @friends @friends = [] xml = nil if file = saved?(TwitterUser.new(@tw_login_name), 1) then xml = file else begin open(TWAPI_FRIENDS, @tw_auth){|f| xml = f.read} rescue OpenURI::HTTPError print $!, ". wait and retry ...\n" sleep(ERROR_INTERVAL) retry end save_html(@tw_login_name, 1, xml) end xml = REXML::Document.new(xml) xml.elements.each('/users/user') do |ele| @friends << TwitterUser.new(ele.elements['screen_name'].text) end @friends end def tmp_file_name(name, page) TMP_DIR + name + '_' + page.to_s + '.html' end def save_html(name, page, body) File.open(tmp_file_name(name, page), "w") do |f| f.print body end end def saved?(user, page) if FileTest.exist?(filename = tmp_file_name(user.user, page)) then html_body = '' File.open(filename) do |f| html_body << f.read end return html_body end nil end def get_page(http, user, page) if tmp = saved?(user, page) then return tmp end http_req = Net::HTTP::Get.new(user.friends_path + "?page=" + page.to_s) http_req['Connection'] = 'Keep-Alive' http_req.basic_auth(@tw_login_name, @tw_login_pass) sleep(TIME_INTERVAL) begin res = http.request(http_req) http_body = res.body case res.code when "302" then raise WrongPasswordError end rescue case $! when WrongPasswordError then print $!, ". id or password is wrong.\n" exit else print $!, "\nrestart http...\n" http.finish sleep(ERROR_INTERVAL) http.start retry end end save_html(user.user, page, http_body) http_body end def get_friends list = TwitterUsersList.new uri = URI.parse(TW_HOME) Net::HTTP.start(uri.host) do |http| print "getting #{@tw_login_name}'s friends...\n" get_my_users.each do |user| counter = 0 while(true) do counter += 1 print ' ' * 78 + "\r" print "getting #{user.user}'s page #{counter} ..." page = Hpricot(get_page(http, user, counter)) page.search("tr.vcard").each do |html| name = html.search("a.uid").inner_html.gsub(/<[^>]*>/, '') list.add(TwitterUser.new(name, html.inner_html), user) end print "done. \r" break if page.search("a[@rel='me next']").empty? break if counter > MAX_PAGE end print "\n" end end self.replace(list.keys.sort!{|a,b| list[b].friends.size <=> list[a].friends.size }.map!{|name| [name, list[name], list[name].friends.size] }) end def output_html(filename = 'out.html') get_friends @template.search("tr.vcard").remove str = '' self.each do |obj| break if obj[2] < 10 str << '<tr class="vcard">' str << obj[1].html.to_s str << '</tr>' str << '<tr class="friends"><td colspan="2">' + obj[2].to_s + ': ' str << obj[1].friends.map{|friend| '<a href="' + TW_HOME + '/' + friend.user + '">' + friend.user + '</a>' }.join(', ') str << '</td></tr>' end @template.search("table.doing").prepend(str) File.open(filename, "w") do |f| f.print @template.to_html end end end if $0 == __FILE__ then user = ARGV.shift; pass = ARGV.shift; output = ARGV.shift; if user.nil? or pass.nil? then print "use: ./#{$0} USER PASS [OUTPUT_FILE]\n" exit end TwitFriends.new(user, pass).output_html(output) end;