1 # frozen_string_literal: true
 
   5     "Business Description:", "Additional Keywords:"
 
   8   DESCRIPTION_MAX_LENGTH = 500
 
   9   DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
 
  11   def self.new(format, text)
 
  13     when "html" then HTML.new(text || "")
 
  14     when "markdown" then Markdown.new(text || "")
 
  15     when "text" then Text.new(text || "")
 
  20     include ActionView::Helpers::TextHelper
 
  21     include ActionView::Helpers::OutputSafetyHelper
 
  23     def sanitize(text, _options = {})
 
  24       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
 
  33       doc = Nokogiri::HTML(to_html)
 
  38         doc.xpath("//a").each do |link|
 
  40           link_size += link.content.length
 
  43         link_proportion = link_size.to_f / doc.content.length
 
  46       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
 
  47         doc.content.include?(phrase)
 
  50       ([link_proportion - 0.2, 0.0].max * 200) +
 
  67     def truncate_html(max_length = nil, img_length = 1000)
 
  69       return html_doc if max_length.nil?
 
  71       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
 
  72       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
 
  73       accumulated_length = 0
 
  74       exceeded_node_parent = nil
 
  77       doc.traverse do |node|
 
  78         if accumulated_length >= max_length
 
  79           if node == exceeded_node_parent
 
  80             exceeded_node_parent = node.parent
 
  81             node.remove if keep_or_discards.include?(node.name)
 
  88         next unless node.children.empty?
 
  91           accumulated_length += node.text.length
 
  92         elsif node.name == "img"
 
  93           accumulated_length += img_length
 
  96         if accumulated_length >= max_length
 
  98           exceeded_node_parent = node.parent
 
 104         :truncated => truncated,
 
 105         :html => doc.to_html.html_safe
 
 111     def simple_format(text)
 
 112       SimpleFormat.new.simple_format(text, :dir => "auto")
 
 116       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
 
 119     def linkify(text, mode = :urls)
 
 120       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
 
 121       Rinku.auto_link(ERB::Util.html_escape(text), mode, link_attr) do |url|
 
 122         url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
 
 123         shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
 
 124           path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
 
 131     def shorten_host(url, hosts, hosts_replacement)
 
 132       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
 
 133         scheme_host, host, path = m.captures
 
 134         if hosts&.include?(host)
 
 135           path = yield(path) if block_given?
 
 137             "#{hosts_replacement}#{path}"
 
 139             "#{scheme_host}#{path}"
 
 148       linkify(simple_format(self))
 
 156   class Markdown < Base
 
 158       linkify(sanitize(document.to_html), :all)
 
 166       @image_element = first_image_element(document.root) unless defined? @image_element
 
 167       @image_element.attr["src"] if @image_element
 
 171       @image_element = first_image_element(document.root) unless defined? @image_element
 
 172       @image_element.attr["alt"] if @image_element
 
 176       return @description if defined? @description
 
 178       @description = first_truncated_text_content(document.root)
 
 184       return @document if @document
 
 186       @document = Kramdown::Document.new(self)
 
 188       should_get_dir_auto = lambda do |el|
 
 189         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
 
 190         return true if dir_auto_types.include?(el.type)
 
 191         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
 
 196       add_dir = lambda do |element|
 
 197         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
 
 198         element.children.each(&add_dir)
 
 200       add_dir.call(@document.root)
 
 205     def first_image_element(element)
 
 206       return element if image?(element) && element.attr["src"].present?
 
 208       element.children.find do |child|
 
 209         nested_image = first_image_element(child)
 
 210         break nested_image if nested_image
 
 214     def first_truncated_text_content(element)
 
 215       if paragraph?(element)
 
 216         truncated_text_content(element)
 
 218         element.children.find do |child|
 
 219           text = first_truncated_text_content(child)
 
 220           break text unless text.nil?
 
 225     def truncated_text_content(element)
 
 228       append_text = lambda do |child|
 
 229         if child.type == :text
 
 232           child.children.each do |c|
 
 234             break if text.length > DESCRIPTION_MAX_LENGTH
 
 238       append_text.call(element)
 
 240       return nil if text.blank?
 
 242       text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
 
 244       if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
 
 245         text_truncated_to_word_break
 
 247         text.truncate(DESCRIPTION_MAX_LENGTH)
 
 252       element.type == :img || (element.type == :html_element && element.value == "img")
 
 255     def paragraph?(element)
 
 256       element.type == :p || (element.type == :html_element && element.value == "p")
 
 262       linkify(simple_format(ERB::Util.html_escape(self)))