- followしたいユーザのhtmlを1枚のhtmlに貼り付けるんじゃなくて、iframeで各ユーザのページを呼び出すようにした。
- 1ページ10人分ぐらいの表示にしてあまりtwitter.comを叩かないようにした。
- いつの間にか最大メモリ使用量が100MBも使わなくなった。なんか書き換えたっけ…?
- friends.xmlもキャッシュするようにした。
require 'rubygems'
require 'hpricot'
require 'open-uri'
require "rexml/document"
require 'net/http'
Net::HTTP.version_1_2
class TwitFriends < Array
VERSION = '0.0.4'
TW_HOME = 'http://twitter.com/'
TW_FRIENDS = 'http://twitter.com/friends'
TWAPI_FRIENDS = 'http://twitter.com/statuses/friends.xml'
TIME_INTERVAL = 10
ERROR_INTERVAL = 10
MAX_PAGE = 1000
TMP_DIR = './tmp/'
DEFAULT_OUTPUT = 'out.html'
MAX_USER_PER_PAGE = 10
class TwitterUser
def initialize(user, html=nil)
@user = user
@html = html
@friends = []
end
def size
@friends.size
end
def friends_url
TW_HOME + @user + '/friends'
end
def friends_path
'/' + @user + '/friends'
end
attr_accessor(:user, :html, :friends)
end
class TwitterUsersList < Hash
def add(user, friend)
if self[user.user].nil? then
self[user.user] = user
end
self[user.user].friends << friend
self
end
def to_s
str = ''
self.each do |name, user|
str << name + ': ' + user.friends.join(', ') + "\n"
end
str
end
end
class WrongPasswordError < StandardError; end
def initialize(user, pass)
Dir.mkdir(TMP_DIR) if !FileTest.directory?(TMP_DIR)
crypt_pass = 'Basic ' + [user + ':' + pass].pack('m')
@tw_auth = {'Authorization' => crypt_pass}
@template = nil
@tw_login_name = user
@tw_login_pass = pass
end
def get_my_users
return @friends if @friends
@friends = []
xml = nil
if file = saved?(TwitterUser.new(@tw_login_name), 1) then
xml = file
else
begin
open(TWAPI_FRIENDS, @tw_auth){|f| xml = f.read}
rescue OpenURI::HTTPError
print $!, ". wait and retry ...\n"
sleep(ERROR_INTERVAL)
retry
end
save_html(@tw_login_name, 1, xml)
end
xml = REXML::Document.new(xml)
xml.elements.each('/users/user') do |ele|
@friends << TwitterUser.new(ele.elements['screen_name'].text)
end
@friends
end
def tmp_file_name(name, page)
TMP_DIR + name + '_' + page.to_s + '.html'
end
def save_html(name, page, body)
File.open(tmp_file_name(name, page), "w") do |f|
f.print body
end
end
def saved?(user, page)
if FileTest.exist?(filename = tmp_file_name(user.user, page)) then
html_body = ''
File.open(filename) do |f|
html_body << f.read
end
return html_body
end
nil
end
def get_page(http, user, page)
if tmp = saved?(user, page) then
return tmp
end
http_req = Net::HTTP::Get.new(user.friends_path + "?page=" + page.to_s)
http_req['Connection'] = 'Keep-Alive'
http_req.basic_auth(@tw_login_name, @tw_login_pass)
sleep(TIME_INTERVAL)
begin
res = http.request(http_req)
http_body = res.body
case res.code
when "302" then
raise WrongPasswordError
end
rescue
case $!
when WrongPasswordError then
print $!, ". id or password is wrong.\n"
exit
else
print $!, "\nrestart http...\n"
http.finish
sleep(ERROR_INTERVAL)
http.start
retry
end
end
save_html(user.user, page, http_body)
http_body
end
def get_friends
list = TwitterUsersList.new
uri = URI.parse(TW_HOME)
Net::HTTP.start(uri.host) do |http|
print "getting #{@tw_login_name}'s friends...\n"
get_my_users.each do |user|
counter = 0
while(true) do
counter += 1
print ' ' * 78 + "\r"
print "getting #{user.user}'s page #{counter} ..."
page = Hpricot(get_page(http, user, counter))
page.search("tr.vcard").each do |html|
name = html.search("a.uid").inner_html.gsub(/<[^>]*>/, '')
list.add(TwitterUser.new(name, html.inner_html), user)
end
print "done. \r"
break if page.search("a[@rel='me next']").empty?
break if counter > MAX_PAGE
end
print "\n"
end
end
self.replace(list.keys.sort!{|a,b|
list[b].friends.size <=> list[a].friends.size
}.map!{|name|
[name, list[name], list[name].friends.size]
})
end
HTML_HEAD = <<-EOH
<html><head><title>get friends!!!</title>
<style>iframe.friends{ height: 13em; width: 50%; }</style></head>
<body><h1><!--__PAGE__--></h1><p><!--__FRIENDS_NUM__--></p><hr />
EOH
HTML_BODY = <<-EOH
<p><!--__FRIENDS_LIST__--></p>
<!--__IFRAME__-->
<hr />
EOH
HTML_FOOT = <<-EOH
<!--__NEXT_PAGE__-->
</body></html>
EOH
def output_html(filename = nil)
filename = DEFAULT_OUTPUT if filename.nil?
_, filename_body, filename_ext = *filename.match(/(.*)\.([^.]*)$/)
counter = 0
while(friends = shift_one_page) do
counter += 1
File.open("#{filename_body}_#{counter}.#{filename_ext}", "w") do |f|
f << HTML_HEAD.gsub(/<!--__PAGE__-->/, "page #{counter}").gsub(/<!--__FRIENDS_NUM__-->/, "found #{self.size} users.")
friends.each do |friend|
friends_list_str_1 = friend[2].to_s + ': '
friends_list_str_1 << friend[1].friends.map{|user|
'<a href="' + TW_HOME + '/' + user.user + '">' + user.user + '</a>'
}.join(', ')
friends_list_str_2 = '<iframe class="friends" src="' + TW_HOME + friend[0] + '#content"></iframe>'
f << HTML_BODY.gsub(/<!--__FRIENDS_LIST__-->/, friends_list_str_1).gsub(/<!--__IFRAME__-->/, friends_list_str_2)
end
foot_navi_str = '<div id="footer">'
foot_navi_str << "<a href=\"#{filename_body}_#{counter-1}.#{filename_ext} \"><< prev</a> |" if counter > 1
foot_navi_str << " <a href=\"#{filename_body}_#{counter+1}.#{filename_ext} \">next >></a>" if !self.empty?
foot_navi_str << "</div>"
f << HTML_FOOT.gsub(/<!--__NEXT_PAGE__-->/, foot_navi_str)
end
end
end
def shift_one_page
tmp = []
MAX_USER_PER_PAGE.times do |i|
break if self.empty?
tmp << self.shift
end
return nil if tmp.empty?
tmp
end
end
if $0 == __FILE__ then
user = ARGV.shift; pass = ARGV.shift; output = ARGV.shift;
if user.nil? or pass.nil? then
print "use: ./#{$0} USER PASS [OUTPUT_FILE]\n"
exit
end
TwitFriends.new(user, pass).get_friends.output_html(output)
end;