1 # frozen_string_literal: true
4 DESCRIPTION_MAX_LENGTH = 500
5 DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6 URL_UNSAFE_CHARS = "[^\\w!#$%&'*+,./:;=?@_~^\\-]"
8 def self.new(format, text)
10 when "html" then HTML.new(text || "")
11 when "markdown" then Markdown.new(text || "")
12 when "text" then Text.new(text || "")
17 @gsub_pairs_for_linkify_detection = nil
20 def self.gsub_pairs_for_linkify_detection
21 @gsub_pairs_for_linkify_detection ||=
23 .wrap(Settings.linkify&.detection_rules)
24 .select { |rule| rule.path_template && rule.patterns.is_a?(Array) }
26 expanded_path = "#{rule.host || "#{Settings.server_protocol}://#{Settings.server_url}"}/#{rule.path_template}"
29 .map { |pattern| [Regexp.new("(?<=^|#{URL_UNSAFE_CHARS})#{pattern}", Regexp::IGNORECASE, :timeout => 1), expanded_path] }
34 include ActionView::Helpers::TextHelper
35 include ActionView::Helpers::OutputSafetyHelper
37 def sanitize(text, _options = {})
38 Sanitize.clean(text, Sanitize::Config::OSM).html_safe
55 def truncate_html(max_length = nil, img_length = 1000)
57 return html_doc if max_length.nil?
59 doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
60 keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
61 accumulated_length = 0
62 exceeded_node_parent = nil
65 doc.traverse do |node|
66 if accumulated_length >= max_length
67 if node == exceeded_node_parent
68 exceeded_node_parent = node.parent
69 node.remove if keep_or_discards.include?(node.name)
76 next unless node.children.empty?
79 accumulated_length += node.text.length
80 elsif node.name == "img"
81 accumulated_length += img_length
84 if accumulated_length >= max_length
86 exceeded_node_parent = node.parent
92 :truncated => truncated,
93 :html => doc.to_html.html_safe
99 def simple_format(text)
100 SimpleFormat.new.simple_format(text, :dir => "auto")
104 Sanitize.clean(text, Sanitize::Config::OSM).html_safe
107 def linkify(text, mode = :urls, hosts: true, paths: true)
108 link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
109 html = ERB::Util.html_escape(text)
111 html = expand_link_shorthands(html) if paths
112 html = expand_host_shorthands(html) if hosts
114 Rinku.auto_link(html, mode, link_attr) do |url|
115 url = shorten_hosts(url) if hosts
116 url = shorten_link(url) if paths
124 def expand_link_shorthands(text)
126 .gsub_pairs_for_linkify_detection
127 .reduce(text) { |text, (pattern, replacement)| text.gsub(pattern, replacement) }
130 def expand_host_shorthands(text)
132 .wrap(Settings.linkify&.normalisation_rules)
133 .select { |rule| rule.host_replacement && rule.hosts&.any? }
134 .reduce(text) do |text, rule|
135 text.gsub(/(?<=^|#{URL_UNSAFE_CHARS})\b#{Regexp.escape(rule.host_replacement)}/) do
136 "#{Settings.server_protocol}://#{rule.hosts[0]}"
141 def shorten_hosts(url)
143 .wrap(Settings.linkify&.normalisation_rules)
144 .reduce(url) { |url, rule| shorten_host(url, rule) }
147 def shorten_link(url)
148 Array.wrap(Settings.linkify&.display_rules)
149 .select { |rule| rule.pattern && rule.replacement }
150 .reduce(url) { |url, rule| url.sub(Regexp.new(rule.pattern), rule.replacement) }
153 def shorten_host(url, rule)
154 %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
155 scheme_host, host, path = m.captures
156 if rule.hosts&.include?(host)
157 path = path.sub(Regexp.new(rule.optional_path_prefix || ""), "")
158 if rule.host_replacement
159 "#{rule.host_replacement}#{path}"
161 "#{scheme_host}#{path}"
170 linkify(simple_format(self), :paths => false)
178 class Markdown < Base
180 linkify(sanitize(document.to_html), :all, :paths => false)
188 @image_element = first_image_element(document.root) unless defined? @image_element
189 @image_element.attr["src"] if @image_element
193 @image_element = first_image_element(document.root) unless defined? @image_element
194 @image_element.attr["alt"] if @image_element
198 return @description if defined? @description
200 @description = first_truncated_text_content(document.root)
206 return @document if @document
208 @document = Kramdown::Document.new(self)
210 should_get_dir_auto = lambda do |el|
211 dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
212 return true if dir_auto_types.include?(el.type)
213 return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
218 add_dir = lambda do |element|
219 element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
220 element.children.each(&add_dir)
222 add_dir.call(@document.root)
227 def first_image_element(element)
228 return element if image?(element) && element.attr["src"].present?
230 element.children.find do |child|
231 nested_image = first_image_element(child)
232 break nested_image if nested_image
236 def first_truncated_text_content(element)
237 if paragraph?(element)
238 truncated_text_content(element)
240 element.children.find do |child|
241 text = first_truncated_text_content(child)
242 break text unless text.nil?
247 def truncated_text_content(element)
250 append_text = lambda do |child|
251 if child.type == :text
254 child.children.each do |c|
256 break if text.length > DESCRIPTION_MAX_LENGTH
260 append_text.call(element)
262 return nil if text.blank?
264 text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
266 if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
267 text_truncated_to_word_break
269 text.truncate(DESCRIPTION_MAX_LENGTH)
274 element.type == :img || (element.type == :html_element && element.value == "img")
277 def paragraph?(element)
278 element.type == :p || (element.type == :html_element && element.value == "p")
284 linkify(simple_format(ERB::Util.html_escape(self)))