1 # frozen_string_literal: true
5 "Business Description:", "Additional Keywords:"
8 DESCRIPTION_MAX_LENGTH = 500
9 DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
11 def self.new(format, text)
13 when "html" then HTML.new(text || "")
14 when "markdown" then Markdown.new(text || "")
15 when "text" then Text.new(text || "")
20 include ActionView::Helpers::TextHelper
21 include ActionView::Helpers::OutputSafetyHelper
23 def sanitize(text, _options = {})
24 Sanitize.clean(text, Sanitize::Config::OSM).html_safe
33 doc = Nokogiri::HTML(to_html)
38 doc.xpath("//a").each do |link|
40 link_size += link.content.length
43 link_proportion = link_size.to_f / doc.content.length
46 spammy_phrases = SPAMMY_PHRASES.count do |phrase|
47 doc.content.include?(phrase)
50 ([link_proportion - 0.2, 0.0].max * 200) +
67 def truncate_html(max_length = nil, img_length = 1000)
69 return html_doc if max_length.nil?
71 doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
72 keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
73 accumulated_length = 0
74 exceeded_node_parent = nil
77 doc.traverse do |node|
78 if accumulated_length >= max_length
79 if node == exceeded_node_parent
80 exceeded_node_parent = node.parent
81 node.remove if keep_or_discards.include?(node.name)
88 next unless node.children.empty?
91 accumulated_length += node.text.length
92 elsif node.name == "img"
93 accumulated_length += img_length
96 if accumulated_length >= max_length
98 exceeded_node_parent = node.parent
104 :truncated => truncated,
105 :html => doc.to_html.html_safe
111 def simple_format(text)
112 SimpleFormat.new.simple_format(text, :dir => "auto")
116 Sanitize.clean(text, Sanitize::Config::OSM).html_safe
119 def linkify(text, mode = :urls)
120 link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
121 Rinku.auto_link(ERB::Util.html_escape(text), mode, link_attr) do |url|
122 url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
123 shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
124 path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
131 def shorten_host(url, hosts, hosts_replacement)
132 %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
133 scheme_host, host, path = m.captures
134 if hosts&.include?(host)
135 path = yield(path) if block_given?
137 "#{hosts_replacement}#{path}"
139 "#{scheme_host}#{path}"
148 linkify(simple_format(self))
156 class Markdown < Base
158 linkify(sanitize(document.to_html), :all)
166 @image_element = first_image_element(document.root) unless defined? @image_element
167 @image_element.attr["src"] if @image_element
171 @image_element = first_image_element(document.root) unless defined? @image_element
172 @image_element.attr["alt"] if @image_element
176 return @description if defined? @description
178 @description = first_truncated_text_content(document.root)
184 return @document if @document
186 @document = Kramdown::Document.new(self)
188 should_get_dir_auto = lambda do |el|
189 dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
190 return true if dir_auto_types.include?(el.type)
191 return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
196 add_dir = lambda do |element|
197 element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
198 element.children.each(&add_dir)
200 add_dir.call(@document.root)
205 def first_image_element(element)
206 return element if image?(element) && element.attr["src"].present?
208 element.children.find do |child|
209 nested_image = first_image_element(child)
210 break nested_image if nested_image
214 def first_truncated_text_content(element)
215 if paragraph?(element)
216 truncated_text_content(element)
218 element.children.find do |child|
219 text = first_truncated_text_content(child)
220 break text unless text.nil?
225 def truncated_text_content(element)
228 append_text = lambda do |child|
229 if child.type == :text
232 child.children.each do |c|
234 break if text.length > DESCRIPTION_MAX_LENGTH
238 append_text.call(element)
240 return nil if text.blank?
242 text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
244 if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
245 text_truncated_to_word_break
247 text.truncate(DESCRIPTION_MAX_LENGTH)
252 element.type == :img || (element.type == :html_element && element.value == "img")
255 def paragraph?(element)
256 element.type == :p || (element.type == :html_element && element.value == "p")
262 linkify(simple_format(ERB::Util.html_escape(self)))