2010-04-20 23:05:11 +00:00
|
|
|
require 'cgi'
|
2012-05-09 11:59:21 +07:00
|
|
|
require 'nokogiri'
|
2010-04-20 23:05:11 +00:00
|
|
|
|
|
|
|
module DText
|
2012-05-09 11:59:21 +07:00
|
|
|
def parse(str)
|
2012-05-13 14:57:31 +07:00
|
|
|
state = ['newline']
|
2012-05-13 08:22:42 +07:00
|
|
|
result = ""
|
2012-05-09 11:59:21 +07:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Normalize newlines.
|
2012-05-13 08:22:42 +07:00
|
|
|
str.strip
|
|
|
|
str.gsub!(/(\r?\n)/, "\n")
|
|
|
|
str.gsub!(/\n{3,}/, "\n\n")
|
2012-05-18 21:28:38 +07:00
|
|
|
str = CGI.escapeHTML str
|
2012-05-09 11:59:21 +07:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Keep newline, use carriage return for split.
|
2012-05-25 08:59:23 -07:00
|
|
|
str.gsub!("\n", "\n\r")
|
2012-05-10 12:35:11 +07:00
|
|
|
data = str.split("\r")
|
2012-05-09 11:59:21 +07:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Parse header and list first, line by line.
|
2012-05-09 11:59:21 +07:00
|
|
|
data.each do |d|
|
2012-05-13 08:22:42 +07:00
|
|
|
result << parseline(d, state)
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
2012-05-22 06:58:09 +07:00
|
|
|
|
|
|
|
# Parse inline tags as a whole.
|
2012-05-19 00:56:04 +07:00
|
|
|
result = parseinline(result)
|
2012-05-09 11:59:21 +07:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Nokogiri ensures valid html output.
|
2012-05-13 08:22:42 +07:00
|
|
|
Nokogiri::HTML::DocumentFragment.parse(result).to_html
|
|
|
|
end
|
|
|
|
|
|
|
|
def parseinline(str)
|
2012-05-22 06:58:09 +07:00
|
|
|
# Short links subtitution:
|
2012-05-25 10:18:21 -07:00
|
|
|
str.gsub!(/\[\[(.+?)\]\]/) do # [[title]] or [[title|label]] ;link to wiki
|
|
|
|
data = $1.split('|', 2)
|
|
|
|
title = data[0]
|
|
|
|
label = data[1].nil? ? title : data[1]
|
|
|
|
"<a href=\"/wiki/show?title=#{CGI.escape(CGI.unescapeHTML(title.tr(" ", "_")))}\">#{label}</a>"
|
2012-03-12 16:02:05 +00:00
|
|
|
end
|
2012-05-19 00:44:11 +07:00
|
|
|
str.gsub!(/\{\{(.+?)\}\}/) do # {{post tags here}} ;search post with tags
|
2012-05-18 21:51:53 +07:00
|
|
|
"<a href=\"/post/index?tags=#{CGI.escape(CGI.unescapeHTML($1))}\">#{$1}</a>"
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Miscellaneous single line tags subtitution.
|
2012-05-13 14:40:39 +07:00
|
|
|
str.gsub! /\[b\](.+)\[\/b\]/, '<strong>\1</strong>'
|
|
|
|
str.gsub! /\[i\](.+)\[\/i\]/, '<em>\1</em>'
|
2012-05-18 23:06:41 +07:00
|
|
|
str.gsub! /(post #(\d+))/i, '<a href="/post/show/\2">\1</a>'
|
|
|
|
str.gsub! /(forum #(\d+))/i, '<a href="/forum/show/\2">\1</a>'
|
|
|
|
str.gsub! /(comment #(\d+))/i, '<a href="/comment/show/\2">\1</a>'
|
|
|
|
str.gsub! /(pool #(\d+))/i, '<a href="/pool/show/\2">\1</a>'
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Single line spoiler tags.
|
2012-05-18 23:56:05 +07:00
|
|
|
str.gsub! /\[spoilers?\](.+)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><span class="spoilertext" style="display: none">\1</span>'
|
|
|
|
str.gsub! /\[spoilers?=(.+?)\](.+)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><span class="spoilertext" style="display: none">\2</span>'
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Multi line spoiler tags.
|
2012-05-18 23:56:05 +07:00
|
|
|
str.gsub! /\[spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><div class="spoilertext" style="display: none">'
|
|
|
|
str.gsub! /\[spoilers?=(.+?)\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><div class="spoilertext" style="display: none">'
|
2012-05-19 00:18:53 +07:00
|
|
|
str.gsub! /\[\/spoilers?\]/, '</div>'
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-05-22 06:58:09 +07:00
|
|
|
# Quote.
|
2012-05-14 04:31:12 +07:00
|
|
|
str.gsub! /\[quote\]/, '<blockquote><div>'
|
|
|
|
str.gsub! /\[\/quote\]/, '</div></blockquote>'
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-05-16 05:38:28 +07:00
|
|
|
str = parseurl(str)
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-05-25 09:40:20 -07:00
|
|
|
# Extraneous newlines before closing div are unnecessary.
|
2012-05-19 00:56:04 +07:00
|
|
|
str.gsub! /\n+(<\/div>)/, '\1'
|
2012-05-25 09:40:20 -07:00
|
|
|
# So are after headers and lists
|
|
|
|
str.gsub! /(<\/ul>|<\/h\d+>)\n+/, '\1'
|
2012-05-18 05:17:00 +07:00
|
|
|
str.gsub! /\n/, '<br>'
|
2012-05-13 09:30:00 +07:00
|
|
|
str
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
|
2012-05-13 08:22:42 +07:00
|
|
|
def parseline(str, state)
|
2012-05-13 14:57:31 +07:00
|
|
|
if state.last =~ /\d/ or str =~ /^\*+\s+/
|
2012-05-13 08:22:42 +07:00
|
|
|
parselist str, state
|
|
|
|
elsif str =~ /^(h[1-6])\.\s*(.+)\n*/
|
2012-05-18 22:38:31 +07:00
|
|
|
str = "<#{$1}>#{$2}</#{$1}>"
|
2012-05-13 08:22:42 +07:00
|
|
|
else
|
2012-05-18 22:38:31 +07:00
|
|
|
str
|
2012-05-13 08:22:42 +07:00
|
|
|
end
|
2012-05-09 11:59:21 +07:00
|
|
|
end
|
|
|
|
|
2012-05-13 08:22:42 +07:00
|
|
|
def parselist(str, state)
|
|
|
|
html = ""
|
2012-05-13 15:17:17 +07:00
|
|
|
if not state.last =~ /\d/
|
2012-05-13 08:22:42 +07:00
|
|
|
state.push "1"
|
|
|
|
html << "<ul>"
|
|
|
|
else
|
2012-05-25 08:59:23 -07:00
|
|
|
n = (str.split()[0] || "").count("*")
|
2012-05-13 08:22:42 +07:00
|
|
|
if n < state.last.to_i
|
2012-05-16 01:55:30 +07:00
|
|
|
html << '</ul>' * (state.last.to_i - n)
|
2012-05-13 08:22:42 +07:00
|
|
|
state[-1] = n.to_s
|
|
|
|
elsif n > state.last.to_i
|
2012-05-16 01:55:30 +07:00
|
|
|
html << '<ul>'
|
2012-05-13 08:22:42 +07:00
|
|
|
state[-1] = (state.last.to_i + 1).to_s
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
|
|
|
end
|
2012-05-16 01:26:30 +07:00
|
|
|
if not str =~ /^\*+\s+/
|
2012-05-13 08:22:42 +07:00
|
|
|
state.pop
|
2012-05-13 16:15:04 +07:00
|
|
|
html << "</ul>"
|
2012-05-13 08:22:42 +07:00
|
|
|
return html + parseline(str, state)
|
|
|
|
end
|
2012-05-19 01:12:50 +07:00
|
|
|
html << str.gsub(/\*+\s+(.+)\n*/, '<li>\1</li>')
|
2012-05-13 08:22:42 +07:00
|
|
|
end
|
2010-04-20 23:05:11 +00:00
|
|
|
|
2012-05-13 08:22:42 +07:00
|
|
|
def parseurl(str)
|
2012-05-22 06:58:09 +07:00
|
|
|
# Basic URL pattern
|
2012-05-18 05:17:00 +07:00
|
|
|
url = /(h?ttps?:\/\/\[?(:{0,2}[\w\-]+)((:{1,2}|\.)[\w\-]+)*\]?(:\d+)*(\/[^\s\n]*)*)/
|
2012-05-22 06:58:09 +07:00
|
|
|
|
|
|
|
# Substitute url tag in this form: <<url|label>>
|
2012-05-22 06:28:11 +07:00
|
|
|
str = str.gsub(/<<\s*#{url}\s*\|\s*(.+?)\s*>>/, '<a href="\1">\7</a>')
|
2012-05-22 06:58:09 +07:00
|
|
|
.gsub(/(^|\s+)"(.+?)":#{url}/, '\1<a href="\3">\2</a>') # "label":url
|
|
|
|
.gsub(/<<\s*#{url}\s*>>/, '<a href="\1">\1</a>') # <<url>>
|
|
|
|
.gsub(/(^|[\s\(])#{url}/, '\1<a href="\2">\2</a>') # url
|
|
|
|
.gsub(/<a href="ttp/, '<a href="http') # Fix ttp(s) scheme
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|
2012-05-16 01:26:30 +07:00
|
|
|
|
|
|
|
module_function :parse, :parseline, :parseinline, :parselist, :parseurl
|
2010-10-11 04:33:50 +00:00
|
|
|
|
|
|
|
# Split a DText-formatted block (an HTML fragment) into individual quote blocks. This
|
|
|
|
# changes:
|
|
|
|
#
|
|
|
|
# <div><blockquote>text</blockquote></div>
|
|
|
|
#
|
|
|
|
# to
|
|
|
|
# <div><block id='1'/></div>
|
|
|
|
# and
|
|
|
|
# <blockquote>text</blockquote>
|
|
|
|
#
|
|
|
|
# This allows translating each quotation separately. These blocks are reconstructed into
|
|
|
|
# a single HTML fragment using combine_blocks.
|
|
|
|
def split_block(doc, blocks, next_seq=[1])
|
|
|
|
while true
|
|
|
|
element = doc.at("//blockquote")
|
|
|
|
break if element.nil?
|
|
|
|
|
|
|
|
seq = next_seq[0]
|
|
|
|
next_seq[0] += 1
|
|
|
|
|
|
|
|
element.swap("<block id='%i'/>" % seq)
|
|
|
|
|
|
|
|
element = split_block(element, blocks, next_seq)
|
|
|
|
blocks[seq] = element.to_html
|
|
|
|
end
|
|
|
|
|
|
|
|
return doc
|
|
|
|
end
|
|
|
|
|
|
|
|
def split_blocks(html, blocks)
|
|
|
|
doc = Hpricot(html)
|
|
|
|
block = split_block(doc, blocks)
|
|
|
|
blocks[0] = block.to_html
|
|
|
|
end
|
|
|
|
|
2010-10-13 03:37:53 +00:00
|
|
|
def combine_block(top, blocks, logging_id = nil)
|
2010-10-11 04:33:50 +00:00
|
|
|
doc = Hpricot(top)
|
|
|
|
doc.search("block").each { |b|
|
|
|
|
id = b.get_attribute("id").to_i
|
2010-10-13 03:37:53 +00:00
|
|
|
if not blocks.include?(id) then
|
|
|
|
logging_id ||= "(unknown)"
|
|
|
|
raise "Comment fragment requires fragment ##{id} which doesn't exist: comment ##{logging_id}, #{b}"
|
|
|
|
end
|
2010-10-11 04:33:50 +00:00
|
|
|
block = blocks[id]
|
|
|
|
final_block = combine_block(block, blocks)
|
2010-10-13 03:37:53 +00:00
|
|
|
b.swap(final_block)
|
2010-10-11 04:33:50 +00:00
|
|
|
}
|
|
|
|
return doc.to_html
|
|
|
|
end
|
|
|
|
|
2010-10-13 03:37:53 +00:00
|
|
|
def combine_blocks(blocks, logging_id = nil)
|
|
|
|
return combine_block(blocks[0], blocks, logging_id)
|
2010-10-11 04:33:50 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
# Add the specified class to all top-level HTML elements.
|
|
|
|
def add_html_class(html, add)
|
|
|
|
doc = Hpricot(html)
|
|
|
|
doc.children.each { |c|
|
|
|
|
cls = c.get_attribute("class")
|
|
|
|
cls ||= ""
|
|
|
|
cls += " " if not cls.empty?
|
|
|
|
cls += add
|
|
|
|
c.set_attribute("class", cls)
|
|
|
|
}
|
|
|
|
return doc.to_html
|
|
|
|
end
|
|
|
|
|
|
|
|
module_function :split_block, :split_blocks, :combine_block, :combine_blocks, :add_html_class
|
2010-04-20 23:05:11 +00:00
|
|
|
end
|