63 lines
1.6 KiB
Ruby
63 lines
1.6 KiB
Ruby
|
#!/usr/bin/env ruby
|
||
|
|
||
|
require 'rubygems'
|
||
|
require 'mechanize'
|
||
|
require 'fileutils'
|
||
|
|
||
|
BROWSER = Mechanize.new { |agent| agent.user_agent_alias = "Mac Safari"}
|
||
|
|
||
|
def cleanup! dir_name
|
||
|
include FileUtils
|
||
|
files = Dir["#{dir_name}/*.jpg"]
|
||
|
chapters = files.map { |f| f.split('/').last.split('Page').first.split('-').first.sub(/_$/,'') }.uniq
|
||
|
|
||
|
puts "Moving pages into chapters\n #{chapters.join("\n\t")}"
|
||
|
|
||
|
chapters.each do |chapter|
|
||
|
dir = "#{dir_name}/#{chapter}"
|
||
|
mkdir dir unless File.exists? dir
|
||
|
files.select { |file| file =~ /#{chapter}_/ }.each do |_file|
|
||
|
file = _file.split('/').last
|
||
|
mv _file, "#{dir_name}/#{chapter}/#{file}"
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
end
|
||
|
def getit! dir_name, target_url
|
||
|
puts ">>>> #{target_url}"
|
||
|
|
||
|
begin
|
||
|
BROWSER.get(target_url) do |page|
|
||
|
next_url = "http://"
|
||
|
next_url << page.uri.host
|
||
|
next_url << page.search('.//div[@id="imgholder"]//a').attr('href').to_s
|
||
|
|
||
|
url = page.search('.//div[@id="imgholder"]//img').attr('src').to_s
|
||
|
|
||
|
name = (page.title.gsub(/[\ \/:]/, '_')+"."+url.split('.').last).gsub /[:\/]/,''
|
||
|
|
||
|
name = name =~ /Page\_\d\.jpg/ ? name.sub('Page_', 'Page_0') : name
|
||
|
|
||
|
puts ">>>> Downloading #{name} ( #{url} )"
|
||
|
|
||
|
BROWSER.get(url).save "#{dir_name}/#{name}"
|
||
|
|
||
|
getit! dir_name, next_url
|
||
|
end
|
||
|
rescue => e
|
||
|
puts "Failed at #{target_url} - probably we reached the end!"
|
||
|
puts e.inspect unless ENV['DEBUG'].nil?
|
||
|
puts 'cleaning up'
|
||
|
cleanup! dir_name
|
||
|
exit 1
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if ENV['CLEANUP'].nil?
|
||
|
puts "usage: \nruby mdl.rb <DIR> '<STARTURL>'" and exit 1 if ARGV.length < 2
|
||
|
getit! ARGV[0], ARGV[1]
|
||
|
else
|
||
|
cleanup! ARGV[0]
|
||
|
end
|