]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Refactor linkify method
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   DESCRIPTION_MAX_LENGTH = 500
5   DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6
7   def self.new(format, text)
8     case format
9     when "html" then HTML.new(text || "")
10     when "markdown" then Markdown.new(text || "")
11     when "text" then Text.new(text || "")
12     end
13   end
14
15   class SimpleFormat
16     include ActionView::Helpers::TextHelper
17     include ActionView::Helpers::OutputSafetyHelper
18
19     def sanitize(text, _options = {})
20       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
21     end
22   end
23
24   class Base < String
25     def image
26       nil
27     end
28
29     def image_alt
30       nil
31     end
32
33     def description
34       nil
35     end
36
37     def truncate_html(max_length = nil, img_length = 1000)
38       html_doc = to_html
39       return html_doc if max_length.nil?
40
41       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
42       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
43       accumulated_length = 0
44       exceeded_node_parent = nil
45       truncated = false
46
47       doc.traverse do |node|
48         if accumulated_length >= max_length
49           if node == exceeded_node_parent
50             exceeded_node_parent = node.parent
51             node.remove if keep_or_discards.include?(node.name)
52           else
53             node.remove
54           end
55           next
56         end
57
58         next unless node.children.empty?
59
60         if node.text?
61           accumulated_length += node.text.length
62         elsif node.name == "img"
63           accumulated_length += img_length
64         end
65
66         if accumulated_length >= max_length
67           truncated = true
68           exceeded_node_parent = node.parent
69           node.remove
70         end
71       end
72
73       {
74         :truncated => truncated,
75         :html => doc.to_html.html_safe
76       }
77     end
78
79     protected
80
81     def simple_format(text)
82       SimpleFormat.new.simple_format(text, :dir => "auto")
83     end
84
85     def sanitize(text)
86       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
87     end
88
89     def linkify(text, mode = :urls)
90       ERB::Util.html_escape(text)
91                .then { |html| auto_link(html, mode) }
92                .html_safe
93     end
94
95     private
96
97     def auto_link(text, mode)
98       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
99       Rinku.auto_link(text, mode, link_attr) { |url| format_link_text(url) }
100     end
101
102     def format_link_text(url)
103       url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
104       shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
105         path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
106       end
107     end
108
109     def shorten_host(url, hosts, hosts_replacement)
110       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
111         scheme_host, host, path = m.captures
112         if hosts&.include?(host)
113           path = yield(path) if block_given?
114           if hosts_replacement
115             "#{hosts_replacement}#{path}"
116           else
117             "#{scheme_host}#{path}"
118           end
119         end || url
120       end || url
121     end
122   end
123
124   class HTML < Base
125     def to_html
126       linkify(simple_format(self))
127     end
128
129     def to_text
130       to_s
131     end
132   end
133
134   class Markdown < Base
135     def to_html
136       linkify(sanitize(document.to_html), :all)
137     end
138
139     def to_text
140       to_s
141     end
142
143     def image
144       @image_element = first_image_element(document.root) unless defined? @image_element
145       @image_element.attr["src"] if @image_element
146     end
147
148     def image_alt
149       @image_element = first_image_element(document.root) unless defined? @image_element
150       @image_element.attr["alt"] if @image_element
151     end
152
153     def description
154       return @description if defined? @description
155
156       @description = first_truncated_text_content(document.root)
157     end
158
159     private
160
161     def document
162       return @document if @document
163
164       @document = Kramdown::Document.new(self)
165
166       should_get_dir_auto = lambda do |el|
167         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
168         return true if dir_auto_types.include?(el.type)
169         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
170
171         false
172       end
173
174       add_dir = lambda do |element|
175         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
176         element.children.each(&add_dir)
177       end
178       add_dir.call(@document.root)
179
180       @document
181     end
182
183     def first_image_element(element)
184       return element if image?(element) && element.attr["src"].present?
185
186       element.children.find do |child|
187         nested_image = first_image_element(child)
188         break nested_image if nested_image
189       end
190     end
191
192     def first_truncated_text_content(element)
193       if paragraph?(element)
194         truncated_text_content(element)
195       else
196         element.children.find do |child|
197           text = first_truncated_text_content(child)
198           break text unless text.nil?
199         end
200       end
201     end
202
203     def truncated_text_content(element)
204       text = +""
205
206       append_text = lambda do |child|
207         if child.type == :text
208           text << child.value
209         else
210           child.children.each do |c|
211             append_text.call(c)
212             break if text.length > DESCRIPTION_MAX_LENGTH
213           end
214         end
215       end
216       append_text.call(element)
217
218       return nil if text.blank?
219
220       text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
221
222       if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
223         text_truncated_to_word_break
224       else
225         text.truncate(DESCRIPTION_MAX_LENGTH)
226       end
227     end
228
229     def image?(element)
230       element.type == :img || (element.type == :html_element && element.value == "img")
231     end
232
233     def paragraph?(element)
234       element.type == :p || (element.type == :html_element && element.value == "p")
235     end
236   end
237
238   class Text < Base
239     def to_html
240       linkify(simple_format(ERB::Util.html_escape(self)))
241     end
242
243     def to_text
244       to_s
245     end
246   end
247 end