Script versi Gist: https://gist.github.com/kuntoaji/c2886b227f6cdf888cea
#!/usr/bin/env ruby require 'open-uri' user_agent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36" saved_url_list = "saved_list.txt" accessed_urls = [] accessed_urls = File.readlines(saved_url_list) if File.exists?(saved_url_list) # tps terbanyak 149 # source: http://kpukotacimahi.com/joomla-overview/40-berita-depan/135-kelurahan-melong-memiliki-tps-terbanyak-dikota-cimahi # jumlah tps, bisa dimodifikasi (1..149).each do |tps| # id kelurahan, bisa dimodifikasi (1..100_000).each do |kel_id| url = "http://pilpres2014.kpu.go.id/c1.php?cmd=download&tps=#{tps}&kel_id=#{kel_id}" # saved_url_list.txt add \n as new line unless accessed_urls.include?("#{url}\n") begin puts "Accessing #{url}" content = open(url, "User-Agent" => user_agent).read rescue puts "Retrying..." retry end file_name = "#{kel_id}_#{tps}.zip" unless File.exists?(file_name) if content.size > 7000 File.open(file_name, 'w') {|f| f.write(content) } saved_file = File.open(saved_url_list, "a") saved_file.puts url saved_file.close puts "#{file_name} is successfully saved" sleep_in_seconds = Random.rand(10..15) puts "sleeping for #{sleep_in_seconds} seconds.." sleep sleep_in_seconds else puts "Empty" end else puts "#{file_name} is exist" end end end end puts "done"
No comments:
Post a Comment