Script versi Gist: https://gist.github.com/kuntoaji/c2886b227f6cdf888cea
#!/usr/bin/env ruby
require 'open-uri'
user_agent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36"
saved_url_list = "saved_list.txt"
accessed_urls = []
accessed_urls = File.readlines(saved_url_list) if File.exists?(saved_url_list)
# tps terbanyak 149
# source: http://kpukotacimahi.com/joomla-overview/40-berita-depan/135-kelurahan-melong-memiliki-tps-terbanyak-dikota-cimahi
# jumlah tps, bisa dimodifikasi
(1..149).each do |tps|
# id kelurahan, bisa dimodifikasi
(1..100_000).each do |kel_id|
url = "http://pilpres2014.kpu.go.id/c1.php?cmd=download&tps=#{tps}&kel_id=#{kel_id}"
# saved_url_list.txt add \n as new line
unless accessed_urls.include?("#{url}\n")
begin
puts "Accessing #{url}"
content = open(url, "User-Agent" => user_agent).read
rescue
puts "Retrying..."
retry
end
file_name = "#{kel_id}_#{tps}.zip"
unless File.exists?(file_name)
if content.size > 7000
File.open(file_name, 'w') {|f| f.write(content) }
saved_file = File.open(saved_url_list, "a")
saved_file.puts url
saved_file.close
puts "#{file_name} is successfully saved"
sleep_in_seconds = Random.rand(10..15)
puts "sleeping for #{sleep_in_seconds} seconds.."
sleep sleep_in_seconds
else
puts "Empty"
end
else
puts "#{file_name} is exist"
end
end
end
end
puts "done"
No comments:
Post a Comment