From d1d1e6e408d7088cd9cb3679d11bcc0561f61b3d Mon Sep 17 00:00:00 2001 From: petopeto Date: Tue, 31 Aug 2010 09:15:55 +0000 Subject: [PATCH] add batch uploader; not very well tested yet, requires hpricot --HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%4066 --- app/controllers/batch_controller.rb | 140 ++++++++++++++++++ app/models/batch_upload.rb | 50 +++++++ app/models/job_task.rb | 20 ++- app/views/batch/create.html.erb | 89 +++++++++++ app/views/batch/index.html.erb | 75 ++++++++++ .../20100831065951_add_batch_uploads.rb | 32 ++++ lib/extract_urls.rb | 31 ++++ public/stylesheets/default.css | 5 + 8 files changed, 440 insertions(+), 2 deletions(-) create mode 100644 app/controllers/batch_controller.rb create mode 100644 app/models/batch_upload.rb create mode 100644 app/views/batch/create.html.erb create mode 100644 app/views/batch/index.html.erb create mode 100644 db/migrate/20100831065951_add_batch_uploads.rb create mode 100644 lib/extract_urls.rb diff --git a/app/controllers/batch_controller.rb b/app/controllers/batch_controller.rb new file mode 100644 index 00000000..1bb8d096 --- /dev/null +++ b/app/controllers/batch_controller.rb @@ -0,0 +1,140 @@ +require 'extract_urls' + +class BatchController < ApplicationController + layout 'default' + before_filter :contributor_only, :only => [:index, :create, :enqueue] + verify :method => :post, :only => [:update, :enqueue] + + def index + if @current_user.is_mod_or_higher? and params[:user_id] == "all" then + user_id = nil + elsif @current_user.is_mod_or_higher? and params[:user_id] then + user_id = params[:user_id] + else + user_id = @current_user.id + end + + p = {:per_page => 25, :order => "created_at ASC, id ASC", :page => params[:page]} + conds = [] + cond_params = [] + if not user_id.nil? then + conds.push("user_id = ?") + cond_params.push(user_id) + end + # conds.push("batch_uploads.status = 'deleted'") + p[:conditions] = [conds.join(" AND "), *cond_params] + @items = BatchUpload.paginate(p) + end + + def update + conds = [] + cond_params = [] + + if @current_user.is_mod_or_higher? and params[:user_id] == "all" then + elsif @current_user.is_mod_or_higher? and params[:user_id] then + conds.push("user_id = ?") + cond_params.push(params[:user_id]) + else + conds.push("user_id = ?") + cond_params.push(@current_user.id) + end + + # Never touch active files. This can race with the uploader. + conds.push("not active") + + count = 0 + + if params[:do] == "pause" then + conds.push("status = 'pending'") + BatchUpload.find(:all, :conditions => [conds.join(" AND "), *cond_params]).each { |item| + item.update_attributes(:status => "paused") + count += 1 + } + flash[:notice] = "Paused %i uploads." % count + elsif params[:do] == "unpause" then + conds.push("status = 'paused'") + BatchUpload.find(:all, :conditions => [conds.join(" AND "), *cond_params]).each { |item| + item.update_attributes(:status => "pending") + count += 1 + } + flash[:notice] = "Resumed %i uploads." % count + elsif params[:do] == "retry" then + conds.push("status = 'error'") + + BatchUpload.find(:all, :conditions => [conds.join(" AND "), *cond_params]).each { |item| + item.update_attributes(:status => "pending") + count += 1 + } + + flash[:notice] = "Retrying %i uploads." % count + elsif params[:do] == "clear_finished" then + conds.push("status = 'finished' or status = 'error'") + BatchUpload.find(:all, :conditions => [conds.join(" AND "), *cond_params]).each { |item| + item.destroy + count += 1 + } + + flash[:notice] = "Cleared %i finished uploads." % count + elsif params[:do] == "abort_all" then + conds.push("status = 'pending'") + BatchUpload.find(:all, :conditions => [conds.join(" AND "), *cond_params]).each { |item| + item.destroy + count += 1 + } + + flash[:notice] = "Cancelled %i uploads." % count + end + + redirect_to :action => "index" + return + end + + def create + filter = {} + if @current_user.is_mod_or_higher? and params[:user_id] == "all" then + elsif @current_user.is_mod_or_higher? and params[:user_id] then + filter[:user_id] = params[:user_id] + else + filter[:user_id] = @current_user.id + end + + if params[:url] then + @source = params[:url] + + text = "" + Danbooru.http_get_streaming(@source) do |response| + response.read_body do |block| + text += block + end + end + + @urls = ExtractUrls.extract_image_urls(@source, text) + end + end + + def enqueue + # Ignore duplicate URLs across users, but duplicate URLs for the same user aren't allowed. + # If that happens, just update the tags. + count = 0 + for url in params[:files] do + count += 1 + tags = params[:post][:tags] || "" + tags = tags.split(/ /) + if params[:post][:rating] then + # Add this to the beginning, so any rating: metatags in the tags will + # override it. + tags = ["rating:" + params[:post][:rating]] + tags + end + tags.push("hold") + tags = tags.uniq.join(" ") + + b = BatchUpload.find_or_initialize_by_url_and_user_id(:user_id => @current_user.id, :url => url) + b.tags = tags + b.ip = request.remote_ip + b.save! + end + + flash[:notice] = "Queued %i files" % count + redirect_to :action => "index" + end +end diff --git a/app/models/batch_upload.rb b/app/models/batch_upload.rb new file mode 100644 index 00000000..2e82c67c --- /dev/null +++ b/app/models/batch_upload.rb @@ -0,0 +1,50 @@ +class BatchUpload < ActiveRecord::Base + belongs_to :user + + def data + JSON.parse(data_as_json) + end + + def data=(hoge) + self.data_as_json = hoge.to_json + end + + def run + self.active = true + self.save! + + @post = Post.create({:source => self.url, :tags => self.tags, :updater_user_id => self.user_id, :updater_ip_addr => self.ip, :user_id => self.user_id, :ip_addr => self.ip, :status => "active"}) + + if @post.errors.empty? + if CONFIG["dupe_check_on_upload"] && @post.image? && @post.parent_id.nil? + options = { :services => SimilarImages.get_services("local"), :type => :post, :source => @post } + + res = SimilarImages.similar_images(options) + if not res[:posts].empty? + @post.tags = @post.tags + " possible_duplicate" + @post.save! + end + end + + self.data = { :success => true, :post_id => @post.id } + elsif @post.errors.invalid?(:md5) + p @post.errors + p = Post.find_by_md5(@post.md5) + self.data = { :success => false, :error => "Post already exists", :post_id => p.id } + else + p @post.errors + self.data = { :success => false, :error => @post.errors.full_messages.join(", ") } + end + + self.active = false + + if self.data["success"] then + self.status = 'finished' + else + self.status = 'error' + end + + self.save! + end +end + diff --git a/app/models/job_task.rb b/app/models/job_task.rb index 5293986c..2a17a287 100644 --- a/app/models/job_task.rb +++ b/app/models/job_task.rb @@ -1,5 +1,5 @@ class JobTask < ActiveRecord::Base - TASK_TYPES = %w(mass_tag_edit approve_tag_alias approve_tag_implication calculate_favorite_tags upload_posts_to_mirrors periodic_maintenance) + TASK_TYPES = %w(mass_tag_edit approve_tag_alias approve_tag_implication calculate_favorite_tags upload_posts_to_mirrors periodic_maintenance upload_batch_posts) STATUSES = %w(pending processing finished error) validates_inclusion_of :task_type, :in => TASK_TYPES @@ -120,6 +120,14 @@ class JobTask < ActiveRecord::Base end end + def execute_upload_batch_posts + upload = BatchUpload.find(:first, :conditions => ["status = 'pending'"], :order => "id ASC") + if upload.nil? then return end + + update_attributes(:data => {:id => upload.id, :user_id => upload.user_id, :url => upload.ulr}) + upload.run + end + def pretty_data case task_type when "mass_tag_edit" @@ -165,6 +173,14 @@ class JobTask < ActiveRecord::Base end "sleeping (#{eta})" end + + when "upload_batch_posts" + if status == "pending" then + return "idle" + elsif status == "processing" then + user = User.find_name(data["user_id"]) + return "uploading #{data["url"]} for #{user}" + end end end @@ -184,7 +200,7 @@ class JobTask < ActiveRecord::Base while true execute_once - sleep 10 + sleep 1 end end end diff --git a/app/views/batch/create.html.erb b/app/views/batch/create.html.erb new file mode 100644 index 00000000..8229c8d6 --- /dev/null +++ b/app/views/batch/create.html.erb @@ -0,0 +1,89 @@ +<% if not @urls %> +
+ <% form_tag({:action => "create"}, :level => :contributor, :method => "get", :id => "edit-form") do %> +
+ + + + + + + + + + + +
+ "> +
<%= submit_tag "Load file index", :tabindex => 8 %>
+
+ <% end %> +
+<% else %> +
+ + + <% form_tag({:action => "enqueue"}, :level => :contributor, :multipart => true, :id => "edit-form") do %> +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
<%= h(@source) %>
+ +
+ <%= text_area "post", "tags", :value => params[:tags], :size => "60x2", :tabindex => 3 %> +
+ + + checked="checked"<% end %> tabindex="5"> + + + checked="checked"<% end %> tabindex="6"> + + + checked="checked"<% end %> tabindex="7"> + +
+ <%= submit_tag "Start upload", :tabindex => 8, :accesskey => "s" %> +
+
+ <% end %> +
+<% end %> + +<% content_for("post_cookie_javascripts") do %> + +<% end %> + diff --git a/app/views/batch/index.html.erb b/app/views/batch/index.html.erb new file mode 100644 index 00000000..3f7d1581 --- /dev/null +++ b/app/views/batch/index.html.erb @@ -0,0 +1,75 @@ +

Batch Uploads

+ + + + + + + + + + + + + <% @items.each do |item| %> + + + + + + + + + <% end %> + +
#UserURLTagsStatus
<%= item.id %><%= link_to h(User.find_name(item.user_id)), :controller => "user", :action => "show", :id => item.user_id %><%= h(File.basename(item.url)) %><%= h(item.tags) %> + <% if item.status == "error" then %> + <% if item.data["post_id"] then %> + Post #<%= link_to h(item.data["post_id"]), :controller => "post", :action => "show", :id => item.data["post_id"] %> already exists + <% else %> + <%= h(item.data["error"].to_s) %> + <% end %> + <% elsif item.status == "pending" then %> + Pending + <% elsif item.status == "paused" then %> + Paused + <% elsif item.status == "finished" then %> + Post #<%= link_to h(item.data["post_id"]), :controller => "post", :action => "show", :id => item.data["post_id"] %> complete + <% end %> +
+ +
+ <% form_tag({:action => "create"}, :method => :get) do %> + <%= submit_tag("Queue uploads", :name => "queue") %> + <% end %> + + <% form_tag({:action => "update"}) do %> + <%= hidden_field_tag "do", "retry" %> + <%= submit_tag("Retry failed") %> + <% end %> + + <% form_tag({:action => "update"}) do %> + <%= hidden_field_tag "do", "clear_finished" %> + <%= submit_tag("Clear finished uploads") %> + <% end %> + + <% form_tag({:action => "update"}) do %> + <%= hidden_field_tag "do", "abort_all" %> + <%= submit_tag("Cancel all uploads") %> + <% end %> + + <% form_tag({:action => "update"}) do %> + <%= hidden_field_tag "do", "pause" %> + <%= submit_tag("Pause") %> + <% end %> + + <% form_tag({:action => "update"}) do %> + <%= hidden_field_tag "do", "unpause" %> + <%= submit_tag("Resume") %> + <% end %> +
+ +
+ <%= will_paginate(@items) %> +
+ diff --git a/db/migrate/20100831065951_add_batch_uploads.rb b/db/migrate/20100831065951_add_batch_uploads.rb new file mode 100644 index 00000000..c700de56 --- /dev/null +++ b/db/migrate/20100831065951_add_batch_uploads.rb @@ -0,0 +1,32 @@ +class AddBatchUploads < ActiveRecord::Migration + def self.up + create_table :batch_uploads do |t| + t.column :user_id, :integer, :null => false + t.foreign_key :user_id, :users, :id, :on_delete => :cascade + t.column :ip, :inet + t.column :url, :string, :null => false + t.column :tags, :string, :null => false, :default => "" + + # If we're handling this entry right now. This is independent from status; this is + # only informative, to let the user know which file is being processed. + t.column :active, :boolean, :null => false, :default => false + + # If this entry has failed, and won't be retried automatically: + # pending, error, finished + t.column :status, :string, :null => false, :default => "pending" + + t.column :created_at, :timestamp, :null => false, :default => "now()" + t.column :data_as_json, :string, :null => false, :default => "{}" + end + + execute "ALTER TABLE batch_uploads ADD UNIQUE (user_id, url)" + + JobTask.create!(:task_type => "upload_batch_posts", :status => "pending", :repeat_count => -1) + end + + def self.down + drop_table :batch_uploads + JobTask.destroy_all(["task_type = 'upload_batch_posts'"]) + end +end + diff --git a/lib/extract_urls.rb b/lib/extract_urls.rb new file mode 100644 index 00000000..9355c5f6 --- /dev/null +++ b/lib/extract_urls.rb @@ -0,0 +1,31 @@ +require 'hpricot' + +module ExtractUrls + # Extract image URLs from HTML. + def extract_image_urls(url, body) + relative_url = url.gsub(/(https?:\/\/[^?]*)(\?.*)$*/, '\1'); + if relative_url !~ /\/$/ then relative_url += "/" end + + url_head = relative_url.gsub(/(https?:\/\/[^\/]+\/).*/, '\1'); + + urls = [] + doc = Hpricot(body) + doc.search("a[@href]").each do |param| + href = param.attributes["href"] + if href.nil? then next end + if href !~ /\.(png|jpg|jpeg)$/i then next end + if href =~ /https?:\/\// then + elsif href =~ /^\// then + href = url_head + href + elsif href !~ /https?:\/\// then + href = relative_url + href + end + + urls.push(href) + end + return urls + end + + module_function :extract_image_urls +end + diff --git a/public/stylesheets/default.css b/public/stylesheets/default.css index a2ee03f6..0e031723 100644 --- a/public/stylesheets/default.css +++ b/public/stylesheets/default.css @@ -1739,3 +1739,8 @@ P,.inline-image + .inline-image > .inline-thumb { margin-left: 0.5em; } + +.batch-buttons form +{ + float: left; +}