2010-04-20 23:05:11 +00:00
|
|
|
module SimilarImages
|
|
|
|
def get_services(services)
|
|
|
|
services = services
|
|
|
|
services ||= "local"
|
|
|
|
if services == "all"
|
2014-08-23 20:11:10 +09:00
|
|
|
services = CONFIG["image_service_list"].map { |a, _b| a }
|
2010-04-20 23:05:11 +00:00
|
|
|
else
|
|
|
|
services = services.split(/,/)
|
|
|
|
end
|
|
|
|
|
|
|
|
services.each_index { |i| if services[i] == "local" then services[i] = CONFIG["local_image_service"] end }
|
2014-08-23 16:56:00 +09:00
|
|
|
services
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2014-08-23 16:44:43 +09:00
|
|
|
def similar_images(options = {})
|
2014-08-23 16:46:31 +09:00
|
|
|
errors = {}
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
local_service = CONFIG["local_image_service"]
|
|
|
|
|
|
|
|
services = options[:services]
|
|
|
|
|
|
|
|
services_by_server = {}
|
2014-08-23 18:10:14 +09:00
|
|
|
services.each do |service|
|
2010-04-20 23:05:11 +00:00
|
|
|
server = CONFIG["image_service_list"][service]
|
2014-08-23 18:28:59 +09:00
|
|
|
unless server
|
2024-01-08 19:39:01 +09:00
|
|
|
errors[""] = { services: [ service ], message: "%s is an unknown service" % service }
|
2010-04-20 23:05:11 +00:00
|
|
|
next
|
|
|
|
end
|
|
|
|
services_by_server[server] = [] unless services_by_server[server]
|
2024-01-08 19:39:01 +09:00
|
|
|
services_by_server[server] += [ service ]
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
# If the source is a local post, read the preview and send it with the request.
|
2014-11-08 22:57:37 +09:00
|
|
|
if options[:type] == :post
|
2014-08-23 18:10:14 +09:00
|
|
|
source_file = File.open(options[:source].preview_path, "rb") { |file| file.read }
|
2010-04-20 23:05:11 +00:00
|
|
|
source_filename = options[:source].preview_path
|
2014-11-08 22:57:37 +09:00
|
|
|
elsif options[:type] == :file
|
2010-04-20 23:05:11 +00:00
|
|
|
source_file = options[:source].read
|
|
|
|
source_filename = options[:source_filename]
|
|
|
|
end
|
|
|
|
|
|
|
|
server_threads = []
|
|
|
|
server_responses = {}
|
|
|
|
services_by_server.map do |server, services_list|
|
|
|
|
server_threads.push Thread.new {
|
|
|
|
if options[:type] == :url
|
|
|
|
search_url = options[:source]
|
|
|
|
end
|
|
|
|
if options[:type] == :post && CONFIG["image_service_local_searches_use_urls"]
|
|
|
|
search_url = options[:source].preview_url
|
|
|
|
end
|
|
|
|
|
|
|
|
params = []
|
|
|
|
if search_url
|
2024-12-16 02:11:52 +09:00
|
|
|
params << [ "url", search_url ]
|
2010-04-20 23:05:11 +00:00
|
|
|
else
|
2024-12-16 02:11:52 +09:00
|
|
|
params << [ "file", source_file, filename: File.basename(source_filename) ]
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2014-08-23 18:10:14 +09:00
|
|
|
services_list.each do |s|
|
2024-12-16 02:11:52 +09:00
|
|
|
params << [ "service[]", s ]
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2024-12-16 02:11:52 +09:00
|
|
|
params << [ "forcegray", "on" ] if options[:forcegray] == "1"
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
begin
|
2014-08-23 20:17:13 +09:00
|
|
|
Timeout.timeout(10) do
|
2012-06-04 17:24:00 +07:00
|
|
|
url = URI.parse(server)
|
2015-11-23 16:53:58 +09:00
|
|
|
|
2015-11-23 16:56:17 +09:00
|
|
|
http_options = {}
|
2015-11-23 16:53:58 +09:00
|
|
|
|
|
|
|
if url.scheme == "https"
|
2015-11-23 16:56:17 +09:00
|
|
|
http_options[:use_ssl] = true
|
|
|
|
http_options[:verify_mode] = OpenSSL::SSL::VERIFY_NONE
|
2015-11-23 16:53:58 +09:00
|
|
|
end
|
|
|
|
|
2015-11-23 16:56:17 +09:00
|
|
|
Net::HTTP.start(url.host, url.port, nil, nil, nil, nil, http_options) do |http|
|
2010-04-20 23:05:11 +00:00
|
|
|
http.read_timeout = 10
|
|
|
|
|
|
|
|
request = Net::HTTP::Post.new(server)
|
2024-12-16 02:11:52 +09:00
|
|
|
request.set_form params, "multipart/form-data"
|
2010-04-20 23:05:11 +00:00
|
|
|
response = http.request(request)
|
|
|
|
server_responses[server] = response.body
|
|
|
|
end
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
rescue SocketError, SystemCallError => e
|
2024-01-08 19:39:01 +09:00
|
|
|
errors[server] = { message: e }
|
2014-08-23 19:43:06 +09:00
|
|
|
rescue Timeout::Error
|
2024-01-08 19:39:01 +09:00
|
|
|
errors[server] = { message: "Timed out" }
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
2014-09-16 16:36:05 +09:00
|
|
|
server_threads.each(&:join)
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
posts = []
|
|
|
|
posts_external = []
|
|
|
|
similarity = {}
|
|
|
|
next_id = 1
|
|
|
|
server_responses.map do |server, xml|
|
|
|
|
doc = begin
|
2012-06-10 01:33:10 -07:00
|
|
|
Nokogiri::XML xml.to_valid_utf8
|
2014-08-23 20:07:10 +09:00
|
|
|
rescue
|
2024-01-08 19:39:01 +09:00
|
|
|
errors[server] = { message: "parse error" }
|
2010-04-20 23:05:11 +00:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2014-08-23 18:28:59 +09:00
|
|
|
unless doc.root
|
2024-01-08 19:39:01 +09:00
|
|
|
errors[server] = { message: "invalid response" }
|
2012-06-30 20:03:06 -07:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2014-08-23 16:44:43 +09:00
|
|
|
if doc.root.name == "error"
|
2024-01-08 19:39:01 +09:00
|
|
|
errors[server] = { message: doc.root[:message] }
|
2010-04-20 23:05:11 +00:00
|
|
|
next
|
|
|
|
end
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2012-05-20 16:53:13 -07:00
|
|
|
threshold = (options[:threshold] || doc.root[:threshold]).to_f
|
|
|
|
|
2014-08-23 16:16:09 +09:00
|
|
|
doc.search("matches/match").each do |element|
|
2014-08-23 18:06:02 +09:00
|
|
|
if element[:sim].to_f >= threshold && element[:sim].to_f > 0
|
2012-05-20 16:53:13 -07:00
|
|
|
service = element[:service]
|
2014-08-23 16:16:09 +09:00
|
|
|
image = element.search("[id]").first
|
2012-05-20 16:53:13 -07:00
|
|
|
|
2012-05-20 16:59:49 -07:00
|
|
|
id = image[:id]
|
2012-05-20 16:53:13 -07:00
|
|
|
md5 = element[:md5]
|
|
|
|
|
|
|
|
if service == local_service
|
2024-01-08 19:39:01 +09:00
|
|
|
post = Post.find_by(id: id)
|
2010-04-20 23:05:11 +00:00
|
|
|
unless post.nil? || post == options[:source]
|
2024-01-08 19:39:01 +09:00
|
|
|
posts += [ post ]
|
2012-05-20 16:53:13 -07:00
|
|
|
similarity[post] = element[:sim].to_f
|
|
|
|
end
|
|
|
|
elsif service
|
2014-08-23 17:24:55 +09:00
|
|
|
post = ExternalPost.new
|
2012-05-20 16:53:13 -07:00
|
|
|
post.id = "#{next_id}"
|
2014-11-08 23:19:13 +09:00
|
|
|
next_id += 1
|
2012-05-20 16:53:13 -07:00
|
|
|
post.md5 = md5
|
|
|
|
post.preview_url = element[:preview]
|
2014-11-08 22:57:37 +09:00
|
|
|
if service == "gelbooru.com" # hack
|
2012-05-20 16:53:13 -07:00
|
|
|
post.url = "http://" + service + "/index.php?page=post&s=view&id=" + id
|
2014-11-08 22:57:37 +09:00
|
|
|
elsif service == "e-shuushuu.net" # hack
|
2012-05-20 16:53:13 -07:00
|
|
|
post.url = "http://" + service + "/image/" + id + "/"
|
|
|
|
else
|
|
|
|
post.url = "http://" + service + "/post/show/" + id
|
|
|
|
end
|
2012-05-20 17:43:19 -07:00
|
|
|
post.sample_url = image[:sample_url] || post.url
|
2012-05-20 16:53:13 -07:00
|
|
|
post.service = service
|
|
|
|
post.width = element[:width].to_i
|
|
|
|
post.height = element[:height].to_i
|
|
|
|
post.tags = image[:tags] || ""
|
|
|
|
post.rating = image[:rating] || "s"
|
2024-01-08 19:39:01 +09:00
|
|
|
posts_external += [ post ]
|
2012-05-20 16:53:13 -07:00
|
|
|
|
|
|
|
similarity[post] = element[:sim].to_f
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
posts = posts.sort { |a, b| similarity[b] <=> similarity[a] }
|
|
|
|
posts_external = posts_external.sort { |a, b| similarity[b] <=> similarity[a] }
|
|
|
|
|
2014-08-23 18:10:14 +09:00
|
|
|
errors.map do |server, error|
|
2014-08-23 18:28:59 +09:00
|
|
|
unless error[:services]
|
2010-04-20 23:05:11 +00:00
|
|
|
error[:services] = services_by_server[server] rescue server
|
|
|
|
end
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2024-01-08 19:39:01 +09:00
|
|
|
ret = { posts: posts, posts_external: posts_external, similarity: similarity, services: services, errors: errors }
|
2010-04-20 23:05:11 +00:00
|
|
|
if options[:type] == :post
|
|
|
|
ret[:source] = options[:source]
|
|
|
|
ret[:similarity][options[:source]] = "Original"
|
|
|
|
ret[:search_id] = ret[:source].id
|
|
|
|
else
|
2014-08-23 17:24:55 +09:00
|
|
|
post = ExternalPost.new
|
2010-04-20 23:05:11 +00:00
|
|
|
# post.md5 = md5
|
|
|
|
post.preview_url = options[:source_thumb]
|
|
|
|
post.url = options[:full_url] || options[:url] || options[:source_thumb]
|
|
|
|
post.id = "source"
|
|
|
|
post.service = ""
|
|
|
|
post.tags = ""
|
|
|
|
post.rating = "q"
|
|
|
|
ret[:search_id] = "source"
|
|
|
|
|
2010-12-17 06:13:02 +00:00
|
|
|
# Don't include the source URL if it's a data: url; it can be very large and isn't useful.
|
2014-11-08 22:57:37 +09:00
|
|
|
if post.url.slice(0, 5) == "data:"
|
2010-12-17 06:13:02 +00:00
|
|
|
post.url = ""
|
|
|
|
end
|
|
|
|
|
2021-01-22 08:18:46 +09:00
|
|
|
imgsize = Moebooru::ImageSizeExif.data(source_file)
|
2021-01-22 06:45:55 +09:00
|
|
|
source_width = imgsize[:width]
|
|
|
|
source_height = imgsize[:height]
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
# Since we lose access to the original image when we redirect to a saved search,
|
|
|
|
# the original dimensions can be passed as parameters so we can still display
|
|
|
|
# the original size. This can also be used by user scripts to include the
|
|
|
|
# size of the real image when a thumbnail is passed.
|
|
|
|
post.width = options[:width] || source_width
|
|
|
|
post.height = options[:height] || source_height
|
|
|
|
|
|
|
|
ret[:external_source] = post
|
|
|
|
ret[:similarity][post] = "Original"
|
|
|
|
end
|
|
|
|
|
2014-08-23 16:56:00 +09:00
|
|
|
ret
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2012-04-29 10:56:41 -07:00
|
|
|
SEARCH_CACHE_DIR = "#{Rails.root}/public/data/search"
|
2010-04-20 23:05:11 +00:00
|
|
|
# Save a file locally to be searched for. Returns the path to the saved file, and
|
|
|
|
# the search ID which can be passed to find_saved_search.
|
|
|
|
def save_search
|
|
|
|
begin
|
2024-01-08 19:39:01 +09:00
|
|
|
FileUtils.mkdir_p(SEARCH_CACHE_DIR, mode: 0775)
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2014-08-23 20:09:48 +09:00
|
|
|
tempfile_path = "#{SEARCH_CACHE_DIR}/#{SecureRandom.random_number(2**32)}.upload"
|
2014-08-23 16:16:09 +09:00
|
|
|
File.open(tempfile_path, "wb") { |f| yield f }
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
# Use the resizer to validate the file and convert it to a thumbnail-size JPEG.
|
2021-01-22 06:45:55 +09:00
|
|
|
imgsize = Moebooru::ImageSizeExif.path(tempfile_path)
|
|
|
|
if imgsize[:type].blank?
|
2012-08-24 07:31:32 -07:00
|
|
|
raise Moebooru::Resizer::ResizeError, "Unrecognized image format"
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
ret = {}
|
2021-01-22 06:45:55 +09:00
|
|
|
ret[:original_width] = imgsize[:width]
|
|
|
|
ret[:original_height] = imgsize[:height]
|
2024-01-08 19:39:01 +09:00
|
|
|
size = Moebooru::Resizer.reduce_to({ width: ret[:original_width], height: ret[:original_height] }, width: 150, height: 150)
|
2021-01-22 06:45:55 +09:00
|
|
|
ext = imgsize[:type].gsub(/jpeg/i, "jpg").downcase
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
tempfile_path_resize = "#{tempfile_path}.2"
|
2012-08-24 07:31:32 -07:00
|
|
|
Moebooru::Resizer.resize(ext, tempfile_path, tempfile_path_resize, size, 95)
|
2010-04-20 23:05:11 +00:00
|
|
|
FileUtils.mv(tempfile_path_resize, tempfile_path)
|
|
|
|
|
2020-05-05 05:54:33 +09:00
|
|
|
md5 = Moebooru::Hasher.compute_one(tempfile_path, :md5)
|
2010-04-20 23:05:11 +00:00
|
|
|
id = "#{md5}.#{ext}"
|
|
|
|
file_path = "#{SEARCH_CACHE_DIR}/#{id}"
|
|
|
|
|
|
|
|
FileUtils.mv(tempfile_path, file_path)
|
|
|
|
FileUtils.chmod(0664, file_path)
|
|
|
|
rescue
|
|
|
|
FileUtils.rm_f(file_path) if file_path
|
|
|
|
raise
|
|
|
|
ensure
|
|
|
|
FileUtils.rm_f(tempfile_path) if tempfile_path
|
|
|
|
FileUtils.rm_f(tempfile_path_resize) if tempfile_path_resize
|
|
|
|
end
|
|
|
|
|
|
|
|
ret[:file_path] = file_path
|
|
|
|
ret[:search_id] = id
|
2014-08-23 16:56:00 +09:00
|
|
|
ret
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def valid_saved_search(id)
|
|
|
|
id =~ /\A[a-zA-Z0-9]{32}\.[a-z]+\Z/
|
|
|
|
end
|
|
|
|
|
|
|
|
# Find a saved file.
|
|
|
|
def find_saved_search(id)
|
2014-08-23 18:28:59 +09:00
|
|
|
unless valid_saved_search(id) then return nil end
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
file_path = "#{SEARCH_CACHE_DIR}/#{id}"
|
2014-08-23 18:28:59 +09:00
|
|
|
unless File.exist?(file_path)
|
2010-04-20 23:05:11 +00:00
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|
|
|
|
# Touch the file to delay its deletion.
|
2014-08-23 16:16:09 +09:00
|
|
|
File.open(file_path, "a")
|
2014-08-23 16:56:00 +09:00
|
|
|
file_path
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
# Delete old searches.
|
|
|
|
def cull_old_searches
|
2014-08-23 18:10:14 +09:00
|
|
|
Dir.foreach(SEARCH_CACHE_DIR) do |path|
|
2014-08-23 18:28:59 +09:00
|
|
|
next unless valid_saved_search(path)
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
file = "#{SEARCH_CACHE_DIR}/#{path}"
|
|
|
|
mtime = File.mtime(file)
|
2014-08-23 16:44:43 +09:00
|
|
|
age = Time.now - mtime
|
2014-11-08 22:57:37 +09:00
|
|
|
if age > 60 * 60 * 24
|
2010-04-20 23:05:11 +00:00
|
|
|
FileUtils.rm_f(file)
|
|
|
|
end
|
2014-08-23 18:10:14 +09:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
module_function :similar_images, :get_services, :find_saved_search, :cull_old_searches, :save_search, :valid_saved_search
|
|
|
|
end
|