]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Extend MapLibre Popup class
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   DESCRIPTION_MAX_LENGTH = 500
5   DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6   URL_UNSAFE_CHARS = "[^\\w!#$%&'*+,./:;=?@_~^\\-]"
7
8   def self.new(format, text)
9     case format
10     when "html" then HTML.new(text || "")
11     when "markdown" then Markdown.new(text || "")
12     when "text" then Text.new(text || "")
13     end
14   end
15
16   class SimpleFormat
17     include ActionView::Helpers::TextHelper
18     include ActionView::Helpers::OutputSafetyHelper
19
20     def sanitize(text, _options = {})
21       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
22     end
23   end
24
25   class Base < String
26     def image
27       nil
28     end
29
30     def image_alt
31       nil
32     end
33
34     def description
35       nil
36     end
37
38     def truncate_html(max_length = nil, img_length = 1000)
39       html_doc = to_html
40       return html_doc if max_length.nil?
41
42       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
43       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
44       accumulated_length = 0
45       exceeded_node_parent = nil
46       truncated = false
47
48       doc.traverse do |node|
49         if accumulated_length >= max_length
50           if node == exceeded_node_parent
51             exceeded_node_parent = node.parent
52             node.remove if keep_or_discards.include?(node.name)
53           else
54             node.remove
55           end
56           next
57         end
58
59         next unless node.children.empty?
60
61         if node.text?
62           accumulated_length += node.text.length
63         elsif node.name == "img"
64           accumulated_length += img_length
65         end
66
67         if accumulated_length >= max_length
68           truncated = true
69           exceeded_node_parent = node.parent
70           node.remove
71         end
72       end
73
74       {
75         :truncated => truncated,
76         :html => doc.to_html.html_safe
77       }
78     end
79
80     protected
81
82     def simple_format(text)
83       SimpleFormat.new.simple_format(text, :dir => "auto")
84     end
85
86     def sanitize(text)
87       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
88     end
89
90     def linkify(text, mode = :urls)
91       ERB::Util.html_escape(text)
92                .then { |html| expand_link_shorthands(html) }
93                .then { |html| expand_host_shorthands(html) }
94                .then { |html| auto_link(html, mode) }
95                .html_safe
96     end
97
98     private
99
100     def gsub_pairs_for_linkify_detection
101       []
102     end
103
104     def expand_link_shorthands(text)
105       gsub_pairs_for_linkify_detection
106         .reduce(text) { |text, (pattern, replacement)| text.gsub(pattern, replacement) }
107     end
108
109     def expand_host_shorthands(text)
110       Array
111         .wrap(Settings.linkify&.normalisation_rules)
112         .select { |rule| rule.host_replacement && rule.hosts&.any? }
113         .reduce(text) do |text, rule|
114           text.gsub(/(?<=^|#{URL_UNSAFE_CHARS})\b#{Regexp.escape(rule.host_replacement)}/) do
115             "#{Settings.server_protocol}://#{rule.hosts[0]}"
116           end
117         end
118     end
119
120     def auto_link(text, mode)
121       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
122       Rinku.auto_link(text, mode, link_attr) { |url| format_link_text(url) }
123     end
124
125     def format_link_text(url)
126       url = Array
127             .wrap(Settings.linkify&.normalisation_rules)
128             .reduce(url) do |normalised_url, rule|
129               shorten_host(normalised_url, rule.hosts, rule.host_replacement) do |path|
130                 path.sub(Regexp.new(rule.optional_path_prefix || ""), "")
131               end
132       end
133       Array.wrap(Settings.linkify&.display_rules)
134            .select { |rule| rule.pattern && rule.replacement }
135            .reduce(url) { |url, rule| url.sub(Regexp.new(rule.pattern), rule.replacement) }
136     end
137
138     def shorten_host(url, hosts, hosts_replacement)
139       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
140         scheme_host, host, path = m.captures
141         if hosts&.include?(host)
142           path = yield(path) if block_given?
143           if hosts_replacement
144             "#{hosts_replacement}#{path}"
145           else
146             "#{scheme_host}#{path}"
147           end
148         end || url
149       end || url
150     end
151   end
152
153   class HTML < Base
154     def to_html
155       linkify(simple_format(self))
156     end
157
158     def to_text
159       to_s
160     end
161   end
162
163   class Markdown < Base
164     def to_html
165       linkify(sanitize(document.to_html), :all)
166     end
167
168     def to_text
169       to_s
170     end
171
172     def image
173       @image_element = first_image_element(document.root) unless defined? @image_element
174       @image_element.attr["src"] if @image_element
175     end
176
177     def image_alt
178       @image_element = first_image_element(document.root) unless defined? @image_element
179       @image_element.attr["alt"] if @image_element
180     end
181
182     def description
183       return @description if defined? @description
184
185       @description = first_truncated_text_content(document.root)
186     end
187
188     private
189
190     def document
191       return @document if @document
192
193       @document = Kramdown::Document.new(self)
194
195       should_get_dir_auto = lambda do |el|
196         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
197         return true if dir_auto_types.include?(el.type)
198         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
199
200         false
201       end
202
203       add_dir = lambda do |element|
204         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
205         element.children.each(&add_dir)
206       end
207       add_dir.call(@document.root)
208
209       @document
210     end
211
212     def first_image_element(element)
213       return element if image?(element) && element.attr["src"].present?
214
215       element.children.find do |child|
216         nested_image = first_image_element(child)
217         break nested_image if nested_image
218       end
219     end
220
221     def first_truncated_text_content(element)
222       if paragraph?(element)
223         truncated_text_content(element)
224       else
225         element.children.find do |child|
226           text = first_truncated_text_content(child)
227           break text unless text.nil?
228         end
229       end
230     end
231
232     def truncated_text_content(element)
233       text = +""
234
235       append_text = lambda do |child|
236         if child.type == :text
237           text << child.value
238         else
239           child.children.each do |c|
240             append_text.call(c)
241             break if text.length > DESCRIPTION_MAX_LENGTH
242           end
243         end
244       end
245       append_text.call(element)
246
247       return nil if text.blank?
248
249       text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
250
251       if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
252         text_truncated_to_word_break
253       else
254         text.truncate(DESCRIPTION_MAX_LENGTH)
255       end
256     end
257
258     def image?(element)
259       element.type == :img || (element.type == :html_element && element.value == "img")
260     end
261
262     def paragraph?(element)
263       element.type == :p || (element.type == :html_element && element.value == "p")
264     end
265   end
266
267   class Text < Base
268     def to_html
269       linkify(simple_format(ERB::Util.html_escape(self)))
270     end
271
272     def to_text
273       to_s
274     end
275
276     private
277
278     def gsub_pairs_for_linkify_detection
279       Array
280         .wrap(Settings.linkify&.detection_rules)
281         .select { |rule| rule.path_template && rule.patterns.is_a?(Array) }
282         .flat_map do |rule|
283           expanded_path = "#{rule.host || "#{Settings.server_protocol}://#{Settings.server_url}"}/#{rule.path_template}"
284           rule.patterns
285               .select { |pattern| pattern.is_a?(String) }
286               .map { |pattern| [Regexp.new("(?<=^|#{URL_UNSAFE_CHARS})#{pattern}", Regexp::IGNORECASE), expanded_path] }
287         end
288     end
289   end
290 end