moebooru/lib/dtext.rb

require 'cgi'
require 'nokogiri'

module DText
  def parse(str)
    state = ['newline']
    result = ""

    # Normalize newlines.
    str.strip!
    str.gsub!(/(\r\n?)/, "\n")
    str.gsub!(/\n{3,}/, "\n\n")
    str = CGI.escapeHTML str

    # Keep newline, use carriage return for split.
    str.gsub!("\n", "\n\r")
    data = str.split("\r")

    # Parse header and list first, line by line.
    data.each do |d|
      result << parseline(d, state)
    end

    # Parse inline tags as a whole.
    result = parseinline(result)

    # Nokogiri ensures valid html output.
    Nokogiri::HTML::DocumentFragment.parse(result).to_html
  end

  def parseinline(str)
    # Short links subtitution:
    str.gsub!(/\[\[(.+?)\]\]/) do # [[title]] or [[title|label]] ;link to wiki
      data = $1.split('|', 2)
      title = data[0]
      label = data[1].nil? ? title : data[1]
      "<a href=\"/wiki/show?title=#{CGI.escape(CGI.unescapeHTML(title.tr(" ", "_")))}\">#{label}</a>"
    end
    str.gsub!(/\{\{(.+?)\}\}/) do # {{post tags here}} ;search post with tags
      "<a href=\"/post?tags=#{CGI.escape(CGI.unescapeHTML($1))}\">#{$1}</a>"
    end

    # Miscellaneous single line tags subtitution.
    str.gsub! /\[b\](.+?)\[\/b\]/, '<strong>\1</strong>'
    str.gsub! /\[i\](.+?)\[\/i\]/, '<em>\1</em>'
    str.gsub! /(post #(\d+))/i, '<a href="/post/show/\2">\1</a>'
    str.gsub! /(forum #(\d+))/i, '<a href="/forum/show/\2">\1</a>'
    str.gsub! /(comment #(\d+))/i, '<a href="/comment/show/\2">\1</a>'
    str.gsub! /(pool #(\d+))/i, '<a href="/pool/show/\2">\1</a>'

    # Single line spoiler tags.
    str.gsub! /\[spoilers?\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><span class="spoilertext" style="display: none">\1</span>'
    str.gsub! /\[spoilers?=(.+?)\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><span class="spoilertext" style="display: none">\2</span>'

    # Multi line spoiler tags.
    str.gsub! /\[spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><div class="spoilertext" style="display: none">'
    str.gsub! /\[spoilers?=(.+?)\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><div class="spoilertext" style="display: none">'
    str.gsub! /\[\/spoilers?\]/, '</div>'

    # Quote.
    str.gsub! /\[quote\]/, '<blockquote><div>'
    str.gsub! /\[\/quote\]/, '</div></blockquote>'

    str = parseurl(str)

    # Extraneous newlines before closing div are unnecessary.
    str.gsub! /\n+(<\/div>)/, '\1'
    # So are after headers, lists, and blockquotes.
    str.gsub! /(<\/(ul|h\d+|blockquote)>)\n+/, '\1'
    # And after opening blockquote.
    str.gsub! /(<blockquote><div>)\n+/, '\1'
    str.gsub! /\n/, '<br>'
    str
  end

  def parseline(str, state)
    if state.last =~ /\d/ or str =~ /^\*+\s+/
      parselist str, state
    elsif str =~ /^(h[1-6])\.\s*(.+)\n*/
      str = "<#{$1}>#{$2}</#{$1}>"
    else
      str
    end
  end

  def parselist(str, state)
    html = ""
    if not state.last =~ /\d/
      state.push "1"
      html << "<ul>"
    else
      n = (str.split()[0] || "").count("*")
      if n < state.last.to_i
        html << '</ul>' * (state.last.to_i - n)
        state[-1] = n.to_s
      elsif n > state.last.to_i
        html << '<ul>'
        state[-1] = (state.last.to_i + 1).to_s
      end
    end
    if not str =~ /^\*+\s+/
      state.pop
      html << "</ul>"
      return html + parseline(str, state)
    end
    html << str.gsub(/\*+\s+(.+)\n*/, '<li>\1')
  end

  def parseurl(str)
    # Basic URL pattern
    url = /(h?ttps?:\/\/\[?(:{0,2}[\w\-]+)((:{1,2}|\.)[\w\-]+)*\]?(:\d+)*(\/[^\s\n<]*)*)/

    # Substitute url tag in this form:
    str.gsub!(/(^|[\s\(>])#{url}/, '\1<a href="\2">\2</a>')                       # url
    str.gsub!(/&lt;&lt;\s*#{url}\s*\|\s*(.+?)\s*&gt;&gt;/, '<a href="\1">\7</a>') # <<url|label>>
    str.gsub!(/(^|[\s>])&quot;(.+?)&quot;:#{url}/, '\1<a href="\3">\2</a>')       # "label":url
    str.gsub!(/&lt;&lt;\s*#{url}\s*&gt;&gt;/, '<a href="\1">\1</a>')              # <<url>>
    str.gsub!(/<a href="ttp/, '<a href="http')                                    # Fix ttp(s) scheme
    return str
  end

  module_function :parse, :parseline, :parseinline, :parselist, :parseurl
end
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`require 'cgi'`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`require 'nokogiri'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
			`module DText`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`def parse(str)`
Use string for all the state 2012-05-13 14:57:31 +07:00			`state = ['newline']`
implement Dtext 2012-05-13 08:22:42 +07:00			`result = ""`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Normalize newlines.`
strip!, not strip. It was previously a noop. 2012-11-18 18:57:42 +07:00			`str.strip!`
it's \r and \r\n we need to replace, not \r\n and \n. 2012-11-18 18:35:03 +07:00			`str.gsub!(/(\r\n?)/, "\n")`
implement Dtext 2012-05-13 08:22:42 +07:00			`str.gsub!(/\n{3,}/, "\n\n")`
Move CGI.escapeHTML 2012-05-18 21:28:38 +07:00			`str = CGI.escapeHTML str`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Keep newline, use carriage return for split.`
Stop eating newlines. (also ensure the split results something before counting stars) 2012-05-25 08:59:23 -07:00			`str.gsub!("\n", "\n\r")`
Use carriage return for line split 2012-05-10 12:35:11 +07:00			`data = str.split("\r")`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Parse header and list first, line by line.`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`data.each do \|d\|`
implement Dtext 2012-05-13 08:22:42 +07:00			`result << parseline(d, state)`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`
Add comments 2012-05-22 06:58:09 +07:00
			`# Parse inline tags as a whole.`
Move last <br> substitution to parseinline. 2012-05-19 00:56:04 +07:00			`result = parseinline(result)`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00
Add comments 2012-05-22 06:58:09 +07:00			`# Nokogiri ensures valid html output.`
implement Dtext 2012-05-13 08:22:42 +07:00			`Nokogiri::HTML::DocumentFragment.parse(result).to_html`
			`end`

			`def parseinline(str)`
Add comments 2012-05-22 06:58:09 +07:00			`# Short links subtitution:`
Fixed link to wiki generation. 2012-05-25 10:18:21 -07:00			`str.gsub!(/\[\[(.+?)\]\]/) do # [[title]] or [[title\|label]] ;link to wiki`
			`data = $1.split('\|', 2)`
			`title = data[0]`
			`label = data[1].nil? ? title : data[1]`
			`"<a href=\"/wiki/show?title=#{CGI.escape(CGI.unescapeHTML(title.tr(" ", "_")))}\">#{label}</a>"`
1.9 should have better unicode. --HG-- extra : rebase_source : 7b3597fc62972a0ea1db9ea5d6f179308a280173 2012-03-12 16:02:05 +00:00			`end`
Add some comments for parseinline 2012-05-19 00:44:11 +07:00			`str.gsub!(/\{\{(.+?)\}\}/) do # {{post tags here}} ;search post with tags`
Nuked /index from most (if not all) places. --HG-- branch : 3.2.0-release 2012-09-21 01:39:06 -07:00			`"<a href=\"/post?tags=#{CGI.escape(CGI.unescapeHTML($1))}\">#{$1}</a>"`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`

Add comments 2012-05-22 06:58:09 +07:00			`# Miscellaneous single line tags subtitution.`
Use lazy regex. Fix consecutive [b] or [i] tags substitution. 2012-06-02 01:17:42 +07:00			`str.gsub! /\[b\](.+?)\[\/b\]/, '<strong>\1</strong>'`
			`str.gsub! /\[i\](.+?)\[\/i\]/, '<em>\1</em>'`
Capture the original case from shortlinks 2012-05-18 23:06:41 +07:00			`str.gsub! /(post #(\d+))/i, '<a href="/post/show/\2">\1</a>'`
			`str.gsub! /(forum #(\d+))/i, '<a href="/forum/show/\2">\1</a>'`
			`str.gsub! /(comment #(\d+))/i, '<a href="/comment/show/\2">\1</a>'`
			`str.gsub! /(pool #(\d+))/i, '<a href="/pool/show/\2">\1</a>'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Add comments 2012-05-22 06:58:09 +07:00			`# Single line spoiler tags.`
Use lazy regex. Fix consecutive [b] or [i] tags substitution. 2012-06-02 01:17:42 +07:00			`str.gsub! /\[spoilers?\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><span class="spoilertext" style="display: none">\1</span>'`
			`str.gsub! /\[spoilers?=(.+?)\](.+?)\[\/spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><span class="spoilertext" style="display: none">\2</span>'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Add comments 2012-05-22 06:58:09 +07:00			`# Multi line spoiler tags.`
Use like... 4 versions of spoiler tag 2012-05-18 23:56:05 +07:00			`str.gsub! /\[spoilers?\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">spoiler</span></span><div class="spoilertext" style="display: none">'`
			`str.gsub! /\[spoilers?=(.+?)\]/, '<span class="spoiler" onclick="Comment.spoiler(this); return false;"><span class="spoilerwarning">\1</span></span><div class="spoilertext" style="display: none">'`
Keep newline after </div>. For consistant layout 2012-05-19 00:18:53 +07:00			`str.gsub! /\[\/spoilers?\]/, '</div>'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Add comments 2012-05-22 06:58:09 +07:00			`# Quote.`
Add links and quote parsing 2012-05-14 04:31:12 +07:00			`str.gsub! /\[quote\]/, '<blockquote><div>'`
			`str.gsub! /\[\/quote\]/, '</div></blockquote>'`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Do escape link 2012-05-16 05:38:28 +07:00			`str = parseurl(str)`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
Whitespace cleanup after lists and headers. 2012-05-25 09:40:20 -07:00			`# Extraneous newlines before closing div are unnecessary.`
Move last <br> substitution to parseinline. 2012-05-19 00:56:04 +07:00			`str.gsub! /\n+(<\/div>)/, '\1'`
Nuke the newlines after blockquote. 2012-05-28 20:00:53 -07:00			`# So are after headers, lists, and blockquotes.`
			`str.gsub! /(<\/(ul\|h\d+\|blockquote)>)\n+/, '\1'`
No newlines after opening blockquote. 2012-07-01 10:11:17 -07:00			`# And after opening blockquote.`
			`str.gsub! /(<blockquote><div>)\n+/, '\1'`
Validate ipv6, and lolttp style scheme. 2012-05-18 05:17:00 +07:00			`str.gsub! /\n/, '<br>'`
do parseinline for list item 2012-05-13 09:30:00 +07:00			`str`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`

implement Dtext 2012-05-13 08:22:42 +07:00			`def parseline(str, state)`
Use string for all the state 2012-05-13 14:57:31 +07:00			`if state.last =~ /\d/ or str =~ /^\*+\s+/`
implement Dtext 2012-05-13 08:22:42 +07:00			`parselist str, state`
			`elsif str =~ /^(h[1-6])\.\s(.+)\n/`
edogawaconan's patch. Less gsub call 2012-05-18 22:38:31 +07:00			`str = "<#{$1}>#{$2}</#{$1}>"`
implement Dtext 2012-05-13 08:22:42 +07:00			`else`
edogawaconan's patch. Less gsub call 2012-05-18 22:38:31 +07:00			`str`
implement Dtext 2012-05-13 08:22:42 +07:00			`end`
Repo init, Dtext re-implementation attempt. 2012-05-09 11:59:21 +07:00			`end`

implement Dtext 2012-05-13 08:22:42 +07:00			`def parselist(str, state)`
			`html = ""`
Fix state check for list 2012-05-13 15:17:17 +07:00			`if not state.last =~ /\d/`
implement Dtext 2012-05-13 08:22:42 +07:00			`state.push "1"`
			`html << "<ul>"`
			`else`
Stop eating newlines. (also ensure the split results something before counting stars) 2012-05-25 08:59:23 -07:00			`n = (str.split()[0] \|\| "").count("*")`
implement Dtext 2012-05-13 08:22:42 +07:00			`if n < state.last.to_i`
Remove <li> for nested <ul> 2012-05-16 01:55:30 +07:00			`html << '</ul>' * (state.last.to_i - n)`
implement Dtext 2012-05-13 08:22:42 +07:00			`state[-1] = n.to_s`
			`elsif n > state.last.to_i`
Remove <li> for nested <ul> 2012-05-16 01:55:30 +07:00			`html << '<ul>'`
implement Dtext 2012-05-13 08:22:42 +07:00			`state[-1] = (state.last.to_i + 1).to_s`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`
			`end`
Make it work with rails 2012-05-16 01:26:30 +07:00			`if not str =~ /^\*+\s+/`
implement Dtext 2012-05-13 08:22:42 +07:00			`state.pop`
Remove pretty format for test, it fails anyway. Also remove <p> from test file 2012-05-13 16:15:04 +07:00			`html << "</ul>"`
implement Dtext 2012-05-13 08:22:42 +07:00			`return html + parseline(str, state)`
			`end`
No need to close tag. Nokogiri will do that automagically. 2012-11-18 18:49:32 +07:00			`html << str.gsub(/\+\s+(.+)\n/, '<li>\1')`
implement Dtext 2012-05-13 08:22:42 +07:00			`end`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00
implement Dtext 2012-05-13 08:22:42 +07:00			`def parseurl(str)`
Add comments 2012-05-22 06:58:09 +07:00			`# Basic URL pattern`
No more <> spacing evilry in dtext. 2012-06-07 05:18:27 -07:00			`url = /(h?ttps?:\/\/\[?(:{0,2}[\w\-]+)((:{1,2}\|\.)[\w\-]+)\]?(:\d+)(\/[^\s\n<]))/`
Add comments 2012-05-22 06:58:09 +07:00
Cleaned up comments. 2012-06-07 04:02:51 -07:00			`# Substitute url tag in this form:`
No more <> spacing evilry in dtext. 2012-06-07 05:18:27 -07:00			`str.gsub!(/(^\|[\s\(>])#{url}/, '\1<a href="\2">\2</a>') # url`
Cleaned up comments. 2012-06-07 04:02:51 -07:00			`str.gsub!(/<<\s#{url}\s\\|\s(.+?)\s>>/, '<a href="\1">\7</a>') # <<url\|label>>`
>>>>> for dtext parseurl. 2012-06-07 05:23:46 -07:00			`str.gsub!(/(^\|[\s>])"(.+?)":#{url}/, '\1<a href="\3">\2</a>') # "label":url`
Cleaned up comments. 2012-06-07 04:02:51 -07:00			`str.gsub!(/<<\s#{url}\s>>/, '<a href="\1">\1</a>') # <<url>>`
			`str.gsub!(/<a href="ttp/, '<a href="http') # Fix ttp(s) scheme`
Accidentally dtext's parseurl return value. 2012-06-01 14:19:39 -04:00			`return str`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`
Make it work with rails 2012-05-16 01:26:30 +07:00
			`module_function :parse, :parseline, :parseinline, :parselist, :parseurl`
--HG-- branch : moe extra : convert_revision : svn%3A2d28d66d-8d94-df11-8c86-00306ef368cb/trunk/moe%405 2010-04-20 23:05:11 +00:00			`end`