2014-08-23 16:16:09 +09:00
|
|
|
require "multipart"
|
|
|
|
require "external_post"
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
module SimilarImages
|
|
|
|
def get_services(services)
|
|
|
|
services = services
|
|
|
|
services ||= "local"
|
|
|
|
if services == "all"
|
2014-08-23 20:11:10 +09:00
|
|
|
services = CONFIG["image_service_list"].map { |a, _b| a }
|
2010-04-20 23:05:11 +00:00
|
|
|
else
|
|
|
|
services = services.split(/,/)
|
|
|
|
end
|
|
|
|
|
|
|
|
services.each_index { |i| if services[i] == "local" then services[i] = CONFIG["local_image_service"] end }
|
2014-08-23 16:56:00 +09:00
|
|
|
services
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2014-08-23 16:44:43 +09:00
|
|
|
def similar_images(options = {})
|
2014-08-23 16:46:31 +09:00
|
|
|
errors = {}
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
local_service = CONFIG["local_image_service"]
|
|
|
|
|
|
|
|
services = options[:services]
|
|
|
|
|
|
|
|
services_by_server = {}
|
2014-08-23 18:10:14 +09:00
|
|
|
services.each do |service|
|
2010-04-20 23:05:11 +00:00
|
|
|
server = CONFIG["image_service_list"][service]
|
2014-08-23 18:28:59 +09:00
|
|
|
unless server
|
2014-08-23 16:44:43 +09:00
|
|
|
errors[""] = { :services => [service], :message => "%s is an unknown service" % service }
|
2010-04-20 23:05:11 +00:00
|
|
|
next
|
|
|
|
end
|
|
|
|
services_by_server[server] = [] unless services_by_server[server]
|
|
|
|
services_by_server[server] += [service]
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
# If the source is a local post, read the preview and send it with the request.
|
2014-11-08 22:57:37 +09:00
|
|
|
if options[:type] == :post
|
2014-08-23 18:10:14 +09:00
|
|
|
source_file = File.open(options[:source].preview_path, "rb") { |file| file.read }
|
2010-04-20 23:05:11 +00:00
|
|
|
source_filename = options[:source].preview_path
|
2014-11-08 22:57:37 +09:00
|
|
|
elsif options[:type] == :file
|
2010-04-20 23:05:11 +00:00
|
|
|
source_file = options[:source].read
|
|
|
|
source_filename = options[:source_filename]
|
|
|
|
end
|
|
|
|
|
|
|
|
server_threads = []
|
|
|
|
server_responses = {}
|
|
|
|
services_by_server.map do |server, services_list|
|
|
|
|
server_threads.push Thread.new {
|
|
|
|
if options[:type] == :url
|
|
|
|
search_url = options[:source]
|
|
|
|
end
|
|
|
|
if options[:type] == :post && CONFIG["image_service_local_searches_use_urls"]
|
|
|
|
search_url = options[:source].preview_url
|
|
|
|
end
|
|
|
|
|
|
|
|
params = []
|
|
|
|
if search_url
|
|
|
|
params += [{
|
2014-08-23 16:44:43 +09:00
|
|
|
:name => "url",
|
2014-08-23 21:32:15 +09:00
|
|
|
:data => search_url
|
2010-04-20 23:05:11 +00:00
|
|
|
}]
|
|
|
|
else
|
|
|
|
params += [{
|
2014-08-23 16:44:43 +09:00
|
|
|
:name => "file",
|
|
|
|
:binary => true,
|
|
|
|
:data => source_file,
|
2014-08-23 21:32:15 +09:00
|
|
|
:filename => File.basename(source_filename)
|
2010-04-20 23:05:11 +00:00
|
|
|
}]
|
|
|
|
end
|
|
|
|
|
2014-08-23 18:10:14 +09:00
|
|
|
services_list.each do |s|
|
2014-08-23 16:44:43 +09:00
|
|
|
params += [{ :name => "service[]", :data => s }]
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2014-08-23 16:44:43 +09:00
|
|
|
params += [{ :name => "forcegray", :data => "on" }] if options[:forcegray] == "1"
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
begin
|
2014-08-23 20:17:13 +09:00
|
|
|
Timeout.timeout(10) do
|
2012-06-04 17:24:00 +07:00
|
|
|
url = URI.parse(server)
|
2010-04-20 23:05:11 +00:00
|
|
|
Net::HTTP.start(url.host, url.port) do |http|
|
|
|
|
http.read_timeout = 10
|
|
|
|
|
|
|
|
request = Net::HTTP::Post.new(server)
|
|
|
|
request.multipart = params
|
|
|
|
response = http.request(request)
|
|
|
|
server_responses[server] = response.body
|
|
|
|
end
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
rescue SocketError, SystemCallError => e
|
2014-08-23 16:44:43 +09:00
|
|
|
errors[server] = { :message => e }
|
2014-08-23 19:43:06 +09:00
|
|
|
rescue Timeout::Error
|
2014-08-23 16:44:43 +09:00
|
|
|
errors[server] = { :message => "Timed out" }
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
2014-09-16 16:36:05 +09:00
|
|
|
server_threads.each(&:join)
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
posts = []
|
|
|
|
posts_external = []
|
|
|
|
similarity = {}
|
|
|
|
next_id = 1
|
|
|
|
server_responses.map do |server, xml|
|
|
|
|
doc = begin
|
2012-06-10 01:33:10 -07:00
|
|
|
Nokogiri::XML xml.to_valid_utf8
|
2014-08-23 20:07:10 +09:00
|
|
|
rescue
|
2014-08-23 16:44:43 +09:00
|
|
|
errors[server] = { :message => "parse error" }
|
2010-04-20 23:05:11 +00:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2014-08-23 18:28:59 +09:00
|
|
|
unless doc.root
|
2014-08-23 16:16:09 +09:00
|
|
|
errors[server] = { :message => "invalid response" }
|
2012-06-30 20:03:06 -07:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2014-08-23 16:44:43 +09:00
|
|
|
if doc.root.name == "error"
|
|
|
|
errors[server] = { :message => doc.root[:message] }
|
2010-04-20 23:05:11 +00:00
|
|
|
next
|
|
|
|
end
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2012-05-20 16:53:13 -07:00
|
|
|
threshold = (options[:threshold] || doc.root[:threshold]).to_f
|
|
|
|
|
2014-08-23 16:16:09 +09:00
|
|
|
doc.search("matches/match").each do |element|
|
2014-08-23 18:06:02 +09:00
|
|
|
if element[:sim].to_f >= threshold && element[:sim].to_f > 0
|
2012-05-20 16:53:13 -07:00
|
|
|
service = element[:service]
|
2014-08-23 16:16:09 +09:00
|
|
|
image = element.search("[id]").first
|
2012-05-20 16:53:13 -07:00
|
|
|
|
2012-05-20 16:59:49 -07:00
|
|
|
id = image[:id]
|
2012-05-20 16:53:13 -07:00
|
|
|
md5 = element[:md5]
|
|
|
|
|
|
|
|
if service == local_service
|
|
|
|
post = Post.find(:first, :conditions => ["id = ?", id])
|
2010-04-20 23:05:11 +00:00
|
|
|
unless post.nil? || post == options[:source]
|
2012-05-20 16:53:13 -07:00
|
|
|
posts += [post]
|
|
|
|
similarity[post] = element[:sim].to_f
|
|
|
|
end
|
|
|
|
elsif service
|
2014-08-23 17:24:55 +09:00
|
|
|
post = ExternalPost.new
|
2012-05-20 16:53:13 -07:00
|
|
|
post.id = "#{next_id}"
|
|
|
|
next_id = next_id + 1
|
|
|
|
post.md5 = md5
|
|
|
|
post.preview_url = element[:preview]
|
2014-11-08 22:57:37 +09:00
|
|
|
if service == "gelbooru.com" # hack
|
2012-05-20 16:53:13 -07:00
|
|
|
post.url = "http://" + service + "/index.php?page=post&s=view&id=" + id
|
2014-11-08 22:57:37 +09:00
|
|
|
elsif service == "e-shuushuu.net" # hack
|
2012-05-20 16:53:13 -07:00
|
|
|
post.url = "http://" + service + "/image/" + id + "/"
|
|
|
|
else
|
|
|
|
post.url = "http://" + service + "/post/show/" + id
|
|
|
|
end
|
2012-05-20 17:43:19 -07:00
|
|
|
post.sample_url = image[:sample_url] || post.url
|
2012-05-20 16:53:13 -07:00
|
|
|
post.service = service
|
|
|
|
post.width = element[:width].to_i
|
|
|
|
post.height = element[:height].to_i
|
|
|
|
post.tags = image[:tags] || ""
|
|
|
|
post.rating = image[:rating] || "s"
|
|
|
|
posts_external += [post]
|
|
|
|
|
|
|
|
similarity[post] = element[:sim].to_f
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
posts = posts.sort { |a, b| similarity[b] <=> similarity[a] }
|
|
|
|
posts_external = posts_external.sort { |a, b| similarity[b] <=> similarity[a] }
|
|
|
|
|
2014-08-23 18:10:14 +09:00
|
|
|
errors.map do |server, error|
|
2014-08-23 18:28:59 +09:00
|
|
|
unless error[:services]
|
2010-04-20 23:05:11 +00:00
|
|
|
error[:services] = services_by_server[server] rescue server
|
|
|
|
end
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
ret = { :posts => posts, :posts_external => posts_external, :similarity => similarity, :services => services, :errors => errors }
|
|
|
|
if options[:type] == :post
|
|
|
|
ret[:source] = options[:source]
|
|
|
|
ret[:similarity][options[:source]] = "Original"
|
|
|
|
ret[:search_id] = ret[:source].id
|
|
|
|
else
|
2014-08-23 17:24:55 +09:00
|
|
|
post = ExternalPost.new
|
2010-04-20 23:05:11 +00:00
|
|
|
# post.md5 = md5
|
|
|
|
post.preview_url = options[:source_thumb]
|
|
|
|
post.url = options[:full_url] || options[:url] || options[:source_thumb]
|
|
|
|
post.id = "source"
|
|
|
|
post.service = ""
|
|
|
|
post.tags = ""
|
|
|
|
post.rating = "q"
|
|
|
|
ret[:search_id] = "source"
|
|
|
|
|
2010-12-17 06:13:02 +00:00
|
|
|
# Don't include the source URL if it's a data: url; it can be very large and isn't useful.
|
2014-11-08 22:57:37 +09:00
|
|
|
if post.url.slice(0, 5) == "data:"
|
2010-12-17 06:13:02 +00:00
|
|
|
post.url = ""
|
|
|
|
end
|
|
|
|
|
2012-11-04 20:44:56 -08:00
|
|
|
imgsize = ImageSize.new(source_file)
|
2012-07-31 10:24:48 +07:00
|
|
|
source_width = imgsize.width
|
|
|
|
source_height = imgsize.height
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
# Since we lose access to the original image when we redirect to a saved search,
|
|
|
|
# the original dimensions can be passed as parameters so we can still display
|
|
|
|
# the original size. This can also be used by user scripts to include the
|
|
|
|
# size of the real image when a thumbnail is passed.
|
|
|
|
post.width = options[:width] || source_width
|
|
|
|
post.height = options[:height] || source_height
|
|
|
|
|
|
|
|
ret[:external_source] = post
|
|
|
|
ret[:similarity][post] = "Original"
|
|
|
|
end
|
|
|
|
|
2014-08-23 16:56:00 +09:00
|
|
|
ret
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2012-04-29 10:56:41 -07:00
|
|
|
SEARCH_CACHE_DIR = "#{Rails.root}/public/data/search"
|
2010-04-20 23:05:11 +00:00
|
|
|
# Save a file locally to be searched for. Returns the path to the saved file, and
|
|
|
|
# the search ID which can be passed to find_saved_search.
|
|
|
|
def save_search
|
|
|
|
begin
|
|
|
|
FileUtils.mkdir_p(SEARCH_CACHE_DIR, :mode => 0775)
|
|
|
|
|
2014-08-23 20:09:48 +09:00
|
|
|
tempfile_path = "#{SEARCH_CACHE_DIR}/#{SecureRandom.random_number(2**32)}.upload"
|
2014-08-23 16:16:09 +09:00
|
|
|
File.open(tempfile_path, "wb") { |f| yield f }
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
# Use the resizer to validate the file and convert it to a thumbnail-size JPEG.
|
2012-08-15 09:11:27 -07:00
|
|
|
imgsize = ImageSize.path(tempfile_path)
|
2012-07-31 10:24:48 +07:00
|
|
|
if imgsize.format.nil?
|
2012-08-24 07:31:32 -07:00
|
|
|
raise Moebooru::Resizer::ResizeError, "Unrecognized image format"
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
ret = {}
|
2012-07-31 10:24:48 +07:00
|
|
|
ret[:original_width] = imgsize.width
|
|
|
|
ret[:original_height] = imgsize.height
|
2014-08-23 16:44:43 +09:00
|
|
|
size = Moebooru::Resizer.reduce_to({ :width => ret[:original_width], :height => ret[:original_height] }, { :width => 150, :height => 150 })
|
2012-11-04 20:36:48 -08:00
|
|
|
ext = imgsize.format.to_s.gsub(/jpeg/i, "jpg").downcase
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
tempfile_path_resize = "#{tempfile_path}.2"
|
2012-08-24 07:31:32 -07:00
|
|
|
Moebooru::Resizer.resize(ext, tempfile_path, tempfile_path_resize, size, 95)
|
2010-04-20 23:05:11 +00:00
|
|
|
FileUtils.mv(tempfile_path_resize, tempfile_path)
|
|
|
|
|
2014-08-23 16:44:43 +09:00
|
|
|
md5 = File.open(tempfile_path, "rb") { |fp| Digest::MD5.hexdigest(fp.read) }
|
2010-04-20 23:05:11 +00:00
|
|
|
id = "#{md5}.#{ext}"
|
|
|
|
file_path = "#{SEARCH_CACHE_DIR}/#{id}"
|
|
|
|
|
|
|
|
FileUtils.mv(tempfile_path, file_path)
|
|
|
|
FileUtils.chmod(0664, file_path)
|
|
|
|
rescue
|
|
|
|
FileUtils.rm_f(file_path) if file_path
|
|
|
|
raise
|
|
|
|
ensure
|
|
|
|
FileUtils.rm_f(tempfile_path) if tempfile_path
|
|
|
|
FileUtils.rm_f(tempfile_path_resize) if tempfile_path_resize
|
|
|
|
end
|
|
|
|
|
|
|
|
ret[:file_path] = file_path
|
|
|
|
ret[:search_id] = id
|
2014-08-23 16:56:00 +09:00
|
|
|
ret
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def valid_saved_search(id)
|
|
|
|
id =~ /\A[a-zA-Z0-9]{32}\.[a-z]+\Z/
|
|
|
|
end
|
|
|
|
|
|
|
|
# Find a saved file.
|
|
|
|
def find_saved_search(id)
|
2014-08-23 18:28:59 +09:00
|
|
|
unless valid_saved_search(id) then return nil end
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
file_path = "#{SEARCH_CACHE_DIR}/#{id}"
|
2014-08-23 18:28:59 +09:00
|
|
|
unless File.exist?(file_path)
|
2010-04-20 23:05:11 +00:00
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|
|
|
|
# Touch the file to delay its deletion.
|
2014-08-23 16:16:09 +09:00
|
|
|
File.open(file_path, "a")
|
2014-08-23 16:56:00 +09:00
|
|
|
file_path
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
# Delete old searches.
|
|
|
|
def cull_old_searches
|
2014-08-23 18:10:14 +09:00
|
|
|
Dir.foreach(SEARCH_CACHE_DIR) do |path|
|
2014-08-23 18:28:59 +09:00
|
|
|
next unless valid_saved_search(path)
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
file = "#{SEARCH_CACHE_DIR}/#{path}"
|
|
|
|
mtime = File.mtime(file)
|
2014-08-23 16:44:43 +09:00
|
|
|
age = Time.now - mtime
|
2014-11-08 22:57:37 +09:00
|
|
|
if age > 60 * 60 * 24
|
2010-04-20 23:05:11 +00:00
|
|
|
FileUtils.rm_f(file)
|
|
|
|
end
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
module_function :similar_images, :get_services, :find_saved_search, :cull_old_searches, :save_search, :valid_saved_search
|
|
|
|
end
|