moebooru/lib/dtext.rb

# encoding: utf-8
require "cgi"
require "nokogiri"

module DText
  def parse(str)
    state = ["newline"]
    result = ""

    # Normalize newlines.
    str.strip!
    str.gsub!(/(\r\n?)/, "\n")
    str.gsub!(/\n{3,}/, "\n\n")
    str = CGI.escapeHTML str

    # Nuke spaces between newlines.
    str.gsub!(/ *\n */, "\n")
    # Keep newline, use carriage return for split.
    str.gsub!("\n", "\n\r")
    data = str.split("\r")

    # Parse header and list first, line by line.
    data.each do |d|
      result << parseline(d, state)
    end

    # Parse inline tags as a whole.
    result = parseinline(result)

    # Nokogiri ensures valid html output.
    Nokogiri::HTML::DocumentFragment.parse(result).to_html
  end

  def parseinline(str)
    # Short links subtitution:
    str.gsub!(/\[\[(.+?)\]\]/) do # [[title]] or [[title|label]] ;link to wiki
      data = Regexp.last_match[1].split("|", 2)
      title = data[0]
      label = data[1].nil? ? title : data[1]
      "<a href=\"/wiki/show?title=#{CGI.escape(CGI.unescapeHTML(title.tr(" ", "_")))}\">#{label}</a>"
    end
    str.gsub!(/\{\{(.+?)\}\}/) do # {{post tags here}} ;search post with tags
      "<a href=\"/post?tags=#{CGI.escape(CGI.unescapeHTML(Regexp.last_match[1]))}\">#{Regexp.last_match[1]}</a>"
    end

    # Miscellaneous single line tags subtitution.
    str.gsub! /\[b\](.+?)\[\/b\]/, '<strong>\1</strong>'
    str.gsub! /\[i\](.+?)\[\/i\]/, '<em>\1</em>'
    str.gsub! /(post #(\d+))/i, '<a href="/post/show/\2">\1</a>'
    str.gsub! /(forum #(\d+))/i, '<a href="/forum/show/\2">\1</a>'
    str.gsub! /(comment #(\d+))/i, '<a href="/comment/show/\2">\1</a>'
    str.gsub! /(pool #(\d+))/i, '<a href="/pool/show/\2">\1</a>'

    # Single line spoiler tags.
    str.gsub! /\[spoilers?\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><span class="spoilertext" style="display: none">\1</span>'
    str.gsub! /\[spoilers?=(.+?)\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><span class="spoilertext" style="display: none">\2</span>'

    # Multi line spoiler tags.
    str.gsub! /\[spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><div class="spoilertext" style="display: none">'
    str.gsub! /\[spoilers?=(.+?)\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><div class="spoilertext" style="display: none">'
    str.gsub! /\[\/spoilers?\]/, "</div>"

    # Quote.
    str.gsub! /\[quote\]/, "<blockquote><div>"
    str.gsub! /\[\/quote\]/, "</div></blockquote>"

    str = parseurl(str)

    # Extraneous newlines before closing div are unnecessary.
    str.gsub! /\n+(<\/div>)/, '\1'
    # So are after headers, lists, and blockquotes.
    str.gsub! /(<\/(ul|h\d+|blockquote)>)\n+/, '\1'
    # And after opening blockquote.
    str.gsub! /(<blockquote><div>)\n+/, '\1'
    str.gsub! /\n/, "<br>"
    str
  end

  def parseline(str, state)
    if state.last =~ /\d/ or str =~ /^\*+\s+/
      parselist str, state
    elsif str =~ /^(h[1-6])\.\s*(.+)\n*/
      str = "<#{Regexp.last_match[1]}>#{Regexp.last_match[2]}</#{Regexp.last_match[1]}>"
    else
      str
    end
  end

  def parselist(str, state)
    html = ""
    if state.last =~ /\d/
      n = ((str =~ /^\*+\s+/ && str.split[0]) || "").count("*")
      if n < state.last.to_i
        html << "</ul>" * (state.last.to_i - n)
        state[-1] = n.to_s
      elsif n > state.last.to_i
        html << "<ul>"
        state[-1] = (state.last.to_i + 1).to_s
      end
      unless str =~ /^\*+\s+/
        state.pop
        return html + parseline(str, state)
      end
    else
      state.push "1"
      html << "<ul>"
    end
    html << str.gsub(/\*+\s+(.+)\n*/, '<li>\1')
  end

  def parseurl(str)
    # url
    str.gsub! %r{(^|[\s\(>])(h?ttps?://(?:(?!&gt;&gt;)[^\s<"])+[^\s<".])}, '\1<a href="\2">\2</a>'

    # <<url|label>>
    str.gsub! %r{&lt;&lt;(h?ttps?://(?:(?!&gt;&gt;).)+)\|((?:(?!&gt;&gt;).)+)&gt;&gt;}, '<a href="\1">\2</a>'

    # <<url>>
    str.gsub! %r{&lt;&lt;(h?ttps?:\/\/(?:(?!&gt;&gt;).)+)&gt;&gt;}, '<a href="\1">\1</a>'

    # "label":url
    str.gsub! %r{(^|[\s>])&quot;((?:(?!&quot;).)+)&quot;:(h?ttps?://[^\s<"]+[^\s<".])}, '\1<a href="\3">\2</a>'

    # Fix ttp(s) scheme
    str.gsub! /<a href="ttp/, '<a href="http'
    str
  end

  module_function :parse, :parseline, :parseinline, :parselist, :parseurl
end
Declare utf-8 encoding on dtext library. 2012-12-27 22:37:49 +07:00			`# encoding: utf-8`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`require "cgi"`
			`require "nokogiri"`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
			`module DText`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`def parse(str)`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`state = ["newline"]`
implement Dtext 2012-05-13 08:22:42 +07:00			`result = ""`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Normalize newlines.`
strip!, not strip. It was previously a noop. 2012-11-18 18:57:42 +07:00			`str.strip!`
it's \r and \r\n we need to replace, not \r\n and \n. 2012-11-18 18:35:03 +07:00			`str.gsub!(/(\r\n?)/, "\n")`
implement Dtext 2012-05-13 08:22:42 +07:00			`str.gsub!(/\n{3,}/, "\n\n")`
Move CGI.escapeHTML 2012-05-18 21:28:38 +07:00			`str = CGI.escapeHTML str`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Removes insignificant space. 2012-11-18 19:07:02 +07:00			`# Nuke spaces between newlines.`
The regexp doesn't have unicode class? Whatever. 2012-11-18 19:11:01 +07:00			`str.gsub!(/ \n /, "\n")`
Add comments 2012-05-22 06:58:09 +07:00			`# Keep newline, use carriage return for split.`
Stop eating newlines. (also ensure the split results something before counting stars) 2012-05-25 08:59:23 -07:00			`str.gsub!("\n", "\n\r")`
Use carriage return for line split 2012-05-10 12:35:11 +07:00			`data = str.split("\r")`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Parse header and list first, line by line.`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`data.each do \|d\|`
implement Dtext 2012-05-13 08:22:42 +07:00			`result << parseline(d, state)`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`
Add comments 2012-05-22 06:58:09 +07:00
			`# Parse inline tags as a whole.`
Move last <br> substitution to parseinline. 2012-05-19 00:56:04 +07:00			`result = parseinline(result)`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Nokogiri ensures valid html output.`
implement Dtext 2012-05-13 08:22:42 +07:00			`Nokogiri::HTML::DocumentFragment.parse(result).to_html`
			`end`

			`def parseinline(str)`
Add comments 2012-05-22 06:58:09 +07:00			`# Short links subtitution:`
Fixed link to wiki generation. 2012-05-25 10:18:21 -07:00			`str.gsub!(/\[\[(.+?)\]\]/) do # [[title]] or [[title\|label]] ;link to wiki`
Avoid the use of Perl-style backrefs. Rubocop(tm). 2014-08-23 17:54:43 +09:00			`data = Regexp.last_match[1].split("\|", 2)`
Fixed link to wiki generation. 2012-05-25 10:18:21 -07:00			`title = data[0]`
			`label = data[1].nil? ? title : data[1]`
			`"<a href=\"/wiki/show?title=#{CGI.escape(CGI.unescapeHTML(title.tr(" ", "_")))}\">#{label}</a>"`
1.9 should have better unicode. --HG-- extra : rebase_source : 7b3597fc62972a0ea1db9ea5d6f179308a280173 2012-03-12 16:02:05 +00:00			`end`
Add some comments for parseinline 2012-05-19 00:44:11 +07:00			`str.gsub!(/\{\{(.+?)\}\}/) do # {{post tags here}} ;search post with tags`
Avoid the use of Perl-style backrefs. Rubocop(tm). 2014-08-23 17:54:43 +09:00			`"<a href=\"/post?tags=#{CGI.escape(CGI.unescapeHTML(Regexp.last_match[1]))}\">#{Regexp.last_match[1]}</a>"`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`

Add comments 2012-05-22 06:58:09 +07:00			`# Miscellaneous single line tags subtitution.`
Use lazy regex. Fix consecutive [b] or [i] tags substitution. 2012-06-02 01:17:42 +07:00			`str.gsub! /\[b\](.+?)\[\/b\]/, '<strong>\1</strong>'`
			`str.gsub! /\[i\](.+?)\[\/i\]/, '<em>\1</em>'`
Capture the original case from shortlinks 2012-05-18 23:06:41 +07:00			`str.gsub! /(post #(\d+))/i, '<a href="/post/show/\2">\1</a>'`
			`str.gsub! /(forum #(\d+))/i, '<a href="/forum/show/\2">\1</a>'`
			`str.gsub! /(comment #(\d+))/i, '<a href="/comment/show/\2">\1</a>'`
			`str.gsub! /(pool #(\d+))/i, '<a href="/pool/show/\2">\1</a>'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Add comments 2012-05-22 06:58:09 +07:00			`# Single line spoiler tags.`
Use lazy regex. Fix consecutive [b] or [i] tags substitution. 2012-06-02 01:17:42 +07:00			`str.gsub! /\[spoilers?\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><span class="spoilertext" style="display: none">\1</span>'`
			`str.gsub! /\[spoilers?=(.+?)\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><span class="spoilertext" style="display: none">\2</span>'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Add comments 2012-05-22 06:58:09 +07:00			`# Multi line spoiler tags.`
Use like... 4 versions of spoiler tag 2012-05-18 23:56:05 +07:00			`str.gsub! /\[spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><div class="spoilertext" style="display: none">'`
			`str.gsub! /\[spoilers?=(.+?)\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><div class="spoilertext" style="display: none">'`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`str.gsub! /\[\/spoilers?\]/, "</div>"`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Add comments 2012-05-22 06:58:09 +07:00			`# Quote.`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`str.gsub! /\[quote\]/, "<blockquote><div>"`
			`str.gsub! /\[\/quote\]/, "</div></blockquote>"`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Do escape link 2012-05-16 05:38:28 +07:00			`str = parseurl(str)`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Whitespace cleanup after lists and headers. 2012-05-25 09:40:20 -07:00			`# Extraneous newlines before closing div are unnecessary.`
Move last <br> substitution to parseinline. 2012-05-19 00:56:04 +07:00			`str.gsub! /\n+(<\/div>)/, '\1'`
Nuke the newlines after blockquote. 2012-05-28 20:00:53 -07:00			`# So are after headers, lists, and blockquotes.`
			`str.gsub! /(<\/(ul\|h\d+\|blockquote)>)\n+/, '\1'`
No newlines after opening blockquote. 2012-07-01 10:11:17 -07:00			`# And after opening blockquote.`
			`str.gsub! /(<blockquote><div>)\n+/, '\1'`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`str.gsub! /\n/, "<br>"`
do parseinline for list item 2012-05-13 09:30:00 +07:00			`str`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`

implement Dtext 2012-05-13 08:22:42 +07:00			`def parseline(str, state)`
Use string for all the state 2012-05-13 14:57:31 +07:00			`if state.last =~ /\d/ or str =~ /^\*+\s+/`
implement Dtext 2012-05-13 08:22:42 +07:00			`parselist str, state`
			`elsif str =~ /^(h[1-6])\.\s(.+)\n/`
Avoid the use of Perl-style backrefs. Rubocop(tm). 2014-08-23 17:54:43 +09:00			`str = "<#{Regexp.last_match[1]}>#{Regexp.last_match[2]}</#{Regexp.last_match[1]}>"`
implement Dtext 2012-05-13 08:22:42 +07:00			`else`
edogawaconan's patch. Less gsub call 2012-05-18 22:38:31 +07:00			`str`
implement Dtext 2012-05-13 08:22:42 +07:00			`end`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`end`

implement Dtext 2012-05-13 08:22:42 +07:00			`def parselist(str, state)`
			`html = ""`
Remove usage of not which couldn't be auto-corrected. 2014-08-23 16:41:06 +09:00			`if state.last =~ /\d/`
Remove extra `()` when calling method. Ruby isn't javascript. 2014-08-23 17:24:55 +09:00			`n = ((str =~ /^\+\s+/ && str.split[0]) \|\| "").count("")`
implement Dtext 2012-05-13 08:22:42 +07:00			`if n < state.last.to_i`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`html << "</ul>" * (state.last.to_i - n)`
implement Dtext 2012-05-13 08:22:42 +07:00			`state[-1] = n.to_s`
			`elsif n > state.last.to_i`
Double quotes everywhere. Using rubocop's auto-correct. 2014-08-23 16:16:09 +09:00			`html << "<ul>"`
implement Dtext 2012-05-13 08:22:42 +07:00			`state[-1] = (state.last.to_i + 1).to_s`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`
Remove usage of not which couldn't be auto-corrected. 2014-08-23 16:41:06 +09:00			`unless str =~ /^\*+\s+/`
Fix #272, thanks to Zolxys. Should check for /^\*+\s+/ pattern before splitting the line, avoid inserting <ul> by mistake and make sure </ul> inserted exactly once for each <ul> 2013-06-02 22:57:16 +07:00			`state.pop`
			`return html + parseline(str, state)`
			`end`
Remove usage of not which couldn't be auto-corrected. 2014-08-23 16:41:06 +09:00			`else`
			`state.push "1"`
			`html << "<ul>"`
implement Dtext 2012-05-13 08:22:42 +07:00			`end`
No need to close tag. Nokogiri will do that automagically. 2012-11-18 18:49:32 +07:00			`html << str.gsub(/\+\s+(.+)\n/, '<li>\1')`
implement Dtext 2012-05-13 08:22:42 +07:00			`end`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
implement Dtext 2012-05-13 08:22:42 +07:00			`def parseurl(str)`
Refactor DText.parseurl. 2014-03-09 23:06:06 +09:00			`# url`
			`str.gsub! %r{(^\|[\s\(>])(h?ttps?://(?:(?!>>)[^\s<"])+[^\s<".])}, '\1<a href="\2">\2</a>'`

			`# <<url\|label>>`
			`str.gsub! %r{<<(h?ttps?://(?:(?!>>).)+)\\|((?:(?!>>).)+)>>}, '<a href="\1">\2</a>'`

			`# <<url>>`
			`str.gsub! %r{<<(h?ttps?:\/\/(?:(?!>>).)+)>>}, '<a href="\1">\1</a>'`

			`# "label":url`
			`str.gsub! %r{(^\|[\s>])"((?:(?!").)+)":(h?ttps?://[^\s<"]+[^\s<".])}, '\1<a href="\3">\2</a>'`

			`# Fix ttp(s) scheme`
			`str.gsub! /<a href="ttp/, '<a href="http'`
Remove redundant return. Courtesy rubocop. 2014-08-23 16:56:00 +09:00			`str`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`
Make it work with rails 2012-05-16 01:26:30 +07:00
			`module_function :parse, :parseline, :parseinline, :parselist, :parseurl`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`