1 # frozen_string_literal: true
4 DESCRIPTION_MAX_LENGTH = 500
5 DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6 URL_UNSAFE_CHARS = "[^\\w!#$%&'*+,./:;=?@_~^\\-]"
8 def self.new(format, text)
10 when "html" then HTML.new(text || "")
11 when "markdown" then Markdown.new(text || "")
12 when "text" then Text.new(text || "")
17 include ActionView::Helpers::TextHelper
18 include ActionView::Helpers::OutputSafetyHelper
20 def sanitize(text, _options = {})
21 Sanitize.clean(text, Sanitize::Config::OSM).html_safe
38 def truncate_html(max_length = nil, img_length = 1000)
40 return html_doc if max_length.nil?
42 doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
43 keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
44 accumulated_length = 0
45 exceeded_node_parent = nil
48 doc.traverse do |node|
49 if accumulated_length >= max_length
50 if node == exceeded_node_parent
51 exceeded_node_parent = node.parent
52 node.remove if keep_or_discards.include?(node.name)
59 next unless node.children.empty?
62 accumulated_length += node.text.length
63 elsif node.name == "img"
64 accumulated_length += img_length
67 if accumulated_length >= max_length
69 exceeded_node_parent = node.parent
75 :truncated => truncated,
76 :html => doc.to_html.html_safe
82 def simple_format(text)
83 SimpleFormat.new.simple_format(text, :dir => "auto")
87 Sanitize.clean(text, Sanitize::Config::OSM).html_safe
90 def linkify(text, mode = :urls)
91 ERB::Util.html_escape(text)
92 .then { |html| expand_link_shorthands(html) }
93 .then { |html| expand_host_shorthands(html) }
94 .then { |html| auto_link(html, mode) }
100 def gsub_pairs_for_linkify_detection
102 .wrap(Settings.linkify&.detection_rules)
103 .select { |rule| rule.path_template && rule.patterns.is_a?(Array) }
105 expanded_path = "#{rule.host || "#{Settings.server_protocol}://#{Settings.server_url}"}/#{rule.path_template}"
107 .select { |pattern| pattern.is_a?(String) }
108 .map { |pattern| [Regexp.new("(?<=^|#{URL_UNSAFE_CHARS})#{pattern}", Regexp::IGNORECASE), expanded_path] }
112 def expand_link_shorthands(text)
113 gsub_pairs_for_linkify_detection
114 .reduce(text) { |text, (pattern, replacement)| text.gsub(pattern, replacement) }
117 def expand_host_shorthands(text)
119 [Settings.linkify_hosts, Settings.linkify_hosts_replacement],
120 [Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement]
122 .select { |hosts, replacement| replacement && hosts&.any? }
123 .reduce(text) do |text, (hosts, replacement)|
124 text.gsub(/(?<=^|#{URL_UNSAFE_CHARS})\b#{Regexp.escape(replacement)}/) do
125 "#{Settings.server_protocol}://#{hosts[0]}"
130 def auto_link(text, mode)
131 link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
132 Rinku.auto_link(text, mode, link_attr) { |url| format_link_text(url) }
135 def format_link_text(url)
136 url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
137 shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
138 path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
142 def shorten_host(url, hosts, hosts_replacement)
143 %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
144 scheme_host, host, path = m.captures
145 if hosts&.include?(host)
146 path = yield(path) if block_given?
148 "#{hosts_replacement}#{path}"
150 "#{scheme_host}#{path}"
159 linkify(simple_format(self))
167 class Markdown < Base
169 linkify(sanitize(document.to_html), :all)
177 @image_element = first_image_element(document.root) unless defined? @image_element
178 @image_element.attr["src"] if @image_element
182 @image_element = first_image_element(document.root) unless defined? @image_element
183 @image_element.attr["alt"] if @image_element
187 return @description if defined? @description
189 @description = first_truncated_text_content(document.root)
195 return @document if @document
197 @document = Kramdown::Document.new(self)
199 should_get_dir_auto = lambda do |el|
200 dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
201 return true if dir_auto_types.include?(el.type)
202 return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
207 add_dir = lambda do |element|
208 element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
209 element.children.each(&add_dir)
211 add_dir.call(@document.root)
216 def first_image_element(element)
217 return element if image?(element) && element.attr["src"].present?
219 element.children.find do |child|
220 nested_image = first_image_element(child)
221 break nested_image if nested_image
225 def first_truncated_text_content(element)
226 if paragraph?(element)
227 truncated_text_content(element)
229 element.children.find do |child|
230 text = first_truncated_text_content(child)
231 break text unless text.nil?
236 def truncated_text_content(element)
239 append_text = lambda do |child|
240 if child.type == :text
243 child.children.each do |c|
245 break if text.length > DESCRIPTION_MAX_LENGTH
249 append_text.call(element)
251 return nil if text.blank?
253 text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
255 if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
256 text_truncated_to_word_break
258 text.truncate(DESCRIPTION_MAX_LENGTH)
263 element.type == :img || (element.type == :html_element && element.value == "img")
266 def paragraph?(element)
267 element.type == :p || (element.type == :html_element && element.value == "p")
273 linkify(simple_format(ERB::Util.html_escape(self)))