2010-04-20 23:05:11 +00:00
|
|
|
module Danbooru
|
2020-12-10 23:10:28 +09:00
|
|
|
# for .diff
|
|
|
|
TAG_DEL = "<del>"
|
|
|
|
TAG_INS = "<ins>"
|
|
|
|
TAG_DEL_CLOSE = "</del>"
|
|
|
|
TAG_INS_CLOSE = "</ins>"
|
|
|
|
TAG_NEWLINE = "↲\n"
|
|
|
|
TAG_BREAK = "<br>\n"
|
|
|
|
|
2010-04-20 23:05:11 +00:00
|
|
|
# Download the given URL, following redirects; once we have the result, yield the request.
|
2014-08-23 20:03:11 +09:00
|
|
|
def http_get_streaming(source, options = {}, &_block)
|
2010-04-20 23:05:11 +00:00
|
|
|
max_size = options[:max_size] || CONFIG["max_image_size"]
|
|
|
|
max_size = nil if max_size == 0 # unlimited
|
|
|
|
|
2010-12-17 06:12:45 +00:00
|
|
|
# Decode data: URLs.
|
|
|
|
if source =~ /^data:([^;]{1,100})(;[^;]{1,100})?,(.*)$/
|
2014-08-23 17:54:43 +09:00
|
|
|
data = Base64.decode64(Regexp.last_match[3])
|
2010-12-17 06:12:45 +00:00
|
|
|
return yield LocalData.new(data)
|
|
|
|
end
|
|
|
|
|
2010-04-20 23:05:11 +00:00
|
|
|
limit = 4
|
|
|
|
|
2014-09-16 16:36:05 +09:00
|
|
|
loop do
|
2012-10-11 19:23:50 -07:00
|
|
|
url = Addressable::URI.parse(source)
|
|
|
|
url.host = url.normalized_host
|
2012-11-17 05:58:03 -08:00
|
|
|
|
2014-08-23 18:06:02 +09:00
|
|
|
unless url.scheme == "http" || url.scheme == "https"
|
2012-11-17 05:58:03 -08:00
|
|
|
raise SocketError, "URL must be HTTP or HTTPS"
|
|
|
|
end
|
|
|
|
|
2012-10-11 19:23:50 -07:00
|
|
|
# check if the request uri is not percent-encoded
|
2012-10-27 08:08:11 -07:00
|
|
|
if url.request_uri.match /[^!*'();:@&=+$,\/?#\[\]ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\-_.~%]/
|
2012-10-11 19:23:50 -07:00
|
|
|
url.path = Addressable::URI.encode(url.path)
|
|
|
|
url.query = Addressable::URI.encode(url.query)
|
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-11-17 05:58:57 -08:00
|
|
|
# Addressable doesn't fill in port data if not explicitly given.
|
2012-11-17 05:55:34 -08:00
|
|
|
unless url.port
|
2014-08-23 16:16:09 +09:00
|
|
|
url.port = url.scheme == "https" ? 443 : 80
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2012-08-24 08:35:58 -07:00
|
|
|
http = Net::HTTP.new url.host, url.port
|
2014-08-23 16:16:09 +09:00
|
|
|
if url.scheme == "https"
|
2012-08-24 08:35:58 -07:00
|
|
|
http.use_ssl = true
|
|
|
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
|
|
end
|
|
|
|
http.start do
|
2010-04-20 23:05:11 +00:00
|
|
|
http.read_timeout = 10
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2010-04-20 23:05:11 +00:00
|
|
|
headers = {
|
|
|
|
"User-Agent" => "#{CONFIG["app_name"]}/#{CONFIG["version"]}",
|
|
|
|
"Referer" => source
|
|
|
|
}
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2017-04-02 14:55:31 +09:00
|
|
|
if source =~ /(pixiv\.net|pximg\.net)/
|
2010-04-20 23:05:11 +00:00
|
|
|
headers["Referer"] = "http://www.pixiv.net"
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2010-04-20 23:05:11 +00:00
|
|
|
# Don't download the small version
|
2014-08-23 20:44:08 +09:00
|
|
|
if source =~ %r{(/img/.+?/.+?)_m.+$}
|
2014-08-23 17:54:43 +09:00
|
|
|
match = Regexp.last_match[1]
|
2010-04-20 23:05:11 +00:00
|
|
|
source.sub!(match + "_m", match)
|
|
|
|
end
|
|
|
|
end
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2010-04-20 23:05:11 +00:00
|
|
|
http.request_get(url.request_uri, headers) do |res|
|
|
|
|
case res
|
|
|
|
when Net::HTTPSuccess then
|
|
|
|
if max_size
|
|
|
|
len = res["Content-Length"]
|
|
|
|
raise SocketError, "File is too large (#{len} bytes)" if len && len.to_i > max_size
|
|
|
|
end
|
|
|
|
|
|
|
|
return yield(res)
|
|
|
|
|
|
|
|
when Net::HTTPRedirection then
|
2014-11-08 22:57:37 +09:00
|
|
|
if limit == 0
|
2010-04-20 23:05:11 +00:00
|
|
|
raise SocketError, "Too many redirects"
|
|
|
|
end
|
2016-01-02 19:34:47 +09:00
|
|
|
new_url = Addressable::URI.parse(res["location"])
|
|
|
|
new_url = (url + new_url) if new_url.relative?
|
|
|
|
|
|
|
|
source = new_url.to_str
|
2010-04-20 23:05:11 +00:00
|
|
|
limit -= 1
|
2012-06-04 17:24:00 +07:00
|
|
|
|
2010-04-20 23:05:11 +00:00
|
|
|
else
|
|
|
|
raise SocketError, "HTTP error code: #{res.code} #{res.message}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
module_function :http_get_streaming
|
2020-12-10 23:10:28 +09:00
|
|
|
|
|
|
|
# Produce a formatted page that shows the difference between two versions of a page.
|
|
|
|
def diff(old, new)
|
|
|
|
pattern = Regexp.new('(?:<.+?>)|(?:\p{Word}+)|(?:[ \t]+)|(?:\r?\n)|(?:.+?)')
|
|
|
|
|
|
|
|
thisarr = old.scan(pattern)
|
|
|
|
otharr = new.scan(pattern)
|
|
|
|
|
|
|
|
cbo = Diff::LCS::ContextDiffCallbacks.new
|
|
|
|
diffs = thisarr.diff(otharr, cbo)
|
|
|
|
|
|
|
|
escape_html = lambda { |str| str.gsub(/&/, "&").gsub(/</, "<").gsub(/>/, ">") }
|
|
|
|
|
|
|
|
output = thisarr
|
|
|
|
output.each { |q| q.replace(escape_html[q]) }
|
|
|
|
|
|
|
|
diffs.reverse_each do |hunk|
|
|
|
|
newchange = hunk.max { |a, b| a.old_position <=> b.old_position }
|
|
|
|
newstart = newchange.old_position
|
|
|
|
oldstart = hunk.min { |a, b| a.old_position <=> b.old_position }.old_position
|
|
|
|
|
|
|
|
if newchange.action == "+"
|
|
|
|
output.insert(newstart, TAG_INS_CLOSE)
|
|
|
|
end
|
|
|
|
|
|
|
|
hunk.reverse_each do |chg|
|
|
|
|
case chg.action
|
|
|
|
when "-"
|
|
|
|
oldstart = chg.old_position
|
|
|
|
output[chg.old_position] = TAG_NEWLINE if chg.old_element.match(/^\r?\n$/)
|
|
|
|
when "+"
|
|
|
|
if chg.new_element.match(/^\r?\n$/)
|
|
|
|
output.insert(chg.old_position, TAG_NEWLINE)
|
|
|
|
else
|
|
|
|
output.insert(chg.old_position, "#{escape_html[chg.new_element]}")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if newchange.action == "+"
|
|
|
|
output.insert(newstart, TAG_INS)
|
|
|
|
end
|
|
|
|
|
|
|
|
if hunk[0].action == "-"
|
|
|
|
output.insert((newstart == oldstart || newchange.action != "+") ? newstart + 1 : newstart, TAG_DEL_CLOSE)
|
|
|
|
output.insert(oldstart, TAG_DEL)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
output.join.gsub(/\r?\n/, TAG_BREAK)
|
|
|
|
end
|
|
|
|
|
|
|
|
module_function :diff
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|