require 'uri' require 'net/http' require 'time' require 'kconv' $KCODE = 'UTF-8' # # = enokidu.rb # # Copyright; 2008 ODA Kaname [trashsuite@gmail.com] # See also ; http://d.hatena.ne.jp/trashsuite/ # # ウェブページ更新確認スクリプト # module Enokidu class UserAgent NAME = 'Enokidu::Antenna' VERSION = '0.1.5' end class Antenna ERROR_CODE = { :connerr => 900, :noroute => 901, :sockerr => 902, :timeout => 903, :unknown => 999 } def initialize(options = {}) @timeout = options[:timeout] || 30 @debug = options[:debug] || false @request = Request.new(:timeout => @timeout, :debug => @debug) end def detective(options = {}) page = options[:page] page.body ||= '' # タイトルを取得 if page.title.empty? @request.get(page) page.last_modified_at = nil end # 初回は HEAD が使えるかどうか確認する page.method = 'HEAD' if page.last_modified_at.nil? page.updated = false case page.method when 'GET' then @request.get(page) when 'HEAD' then @request.head(page) else raise Request::InvalidMethod end rescue Request::Redirect retry rescue Request::InvalidHeadResponse page.method = 'GET' retry rescue Exception => exception case exception when Errno::ECONNREFUSED then page.code = ERROR_CODE[:connerr] when Errno::EHOSTUNREACH then page.code = ERROR_CODE[:noroute] when Timeout::Error then page.code = ERROR_CODE[:timeout] when SocketError then page.code = ERROR_CODE[:sockerr] else puts exception.class page.code = ERROR_CODE[:unknown] end page end end # Antenna class Request def initialize(options = {}) @http_header = {'User-Agent' => Enokidu::UserAgent::NAME, 'Connection' => 'close'} @timeout = options[:timeout] || 30 @debug = options[:debug] || false end def get(page) page.method = 'GET' debug_print "sync by GET method" debug_print "initial sync" if page.body.empty? debug_print "title #{page.title}" unless page.body.empty? uri = URI.parse(page.uri) http = http_instance(uri) res = nil # 更新状況を聞いてみる %w[If-Modified-Since, If-None-Match].each {|header|@http_header.delete header} if !page.title.empty? and !page.has_range? debug_print 'set If-Modified-Since' @http_header['If-Modified-Since'] = page.last_modified_at.httpdate if page.last_modified_at @http_header['If-None-Match'] = page.etag unless page.etag.empty? end timeout(@timeout) do res = http.get(mkpath(uri), @http_header) end page.code = res.code debug_print "Return code #{res.code}" # Redirect if res.code.match(/^30[12]$/) location = res['location'] || '' old_uri = page.uri page.uri = location # ロケーションが空または不完全な場合 if !location.empty? and !location.match(%r[^http://]) uri = URI.parse(old_uri) path, query = location.split('?') uri.path = path uri.query = query || '' page.uri = uri.to_s end raise Redirect end # 親切な御仁に感謝しつつ終了 if res.instance_of? Net::HTTPNotModified debug_print 'use If-Modified-Since' page.updated = false return page end # タイトルを抜き取る body = res.body.toutf8 if page.title.empty? debug_print "get page title" page.title = body.scan(/