]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Merge remote-tracking branch 'upstream/pull/6798'
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   DESCRIPTION_MAX_LENGTH = 500
5   DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6   URL_UNSAFE_CHARS = "[^\\w!#$%&'*+,./:;=?@_~^\\-]"
7
8   def self.new(format, text)
9     case format
10     when "html" then HTML.new(text || "")
11     when "markdown" then Markdown.new(text || "")
12     when "text" then Text.new(text || "")
13     end
14   end
15
16   class SimpleFormat
17     include ActionView::Helpers::TextHelper
18     include ActionView::Helpers::OutputSafetyHelper
19
20     def sanitize(text, _options = {})
21       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
22     end
23   end
24
25   class Base < String
26     def image
27       nil
28     end
29
30     def image_alt
31       nil
32     end
33
34     def description
35       nil
36     end
37
38     def truncate_html(max_length = nil, img_length = 1000)
39       html_doc = to_html
40       return html_doc if max_length.nil?
41
42       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
43       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
44       accumulated_length = 0
45       exceeded_node_parent = nil
46       truncated = false
47
48       doc.traverse do |node|
49         if accumulated_length >= max_length
50           if node == exceeded_node_parent
51             exceeded_node_parent = node.parent
52             node.remove if keep_or_discards.include?(node.name)
53           else
54             node.remove
55           end
56           next
57         end
58
59         next unless node.children.empty?
60
61         if node.text?
62           accumulated_length += node.text.length
63         elsif node.name == "img"
64           accumulated_length += img_length
65         end
66
67         if accumulated_length >= max_length
68           truncated = true
69           exceeded_node_parent = node.parent
70           node.remove
71         end
72       end
73
74       {
75         :truncated => truncated,
76         :html => doc.to_html.html_safe
77       }
78     end
79
80     protected
81
82     def simple_format(text)
83       SimpleFormat.new.simple_format(text, :dir => "auto")
84     end
85
86     def sanitize(text)
87       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
88     end
89
90     def linkify(text, mode = :urls, hosts: true, paths: true)
91       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
92       html = ERB::Util.html_escape(text)
93
94       html = expand_link_shorthands(html) if paths
95       html = expand_host_shorthands(html) if hosts
96
97       Rinku.auto_link(html, mode, link_attr) do |url|
98         url = shorten_hosts(url) if hosts
99         url = shorten_link(url) if paths
100
101         url
102       end.html_safe
103     end
104
105     private
106
107     def gsub_pairs_for_linkify_detection
108       Array
109         .wrap(Settings.linkify&.detection_rules)
110         .select { |rule| rule.path_template && rule.patterns.is_a?(Array) }
111         .flat_map do |rule|
112           expanded_path = "#{rule.host || "#{Settings.server_protocol}://#{Settings.server_url}"}/#{rule.path_template}"
113           rule.patterns
114               .select { |pattern| pattern.is_a?(String) }
115               .map { |pattern| [Regexp.new("(?<=^|#{URL_UNSAFE_CHARS})#{pattern}", Regexp::IGNORECASE, :timeout => 0.01), expanded_path] }
116         end
117     end
118
119     def expand_link_shorthands(text)
120       gsub_pairs_for_linkify_detection
121         .reduce(text) { |text, (pattern, replacement)| text.gsub(pattern, replacement) }
122     end
123
124     def expand_host_shorthands(text)
125       Array
126         .wrap(Settings.linkify&.normalisation_rules)
127         .select { |rule| rule.host_replacement && rule.hosts&.any? }
128         .reduce(text) do |text, rule|
129           text.gsub(/(?<=^|#{URL_UNSAFE_CHARS})\b#{Regexp.escape(rule.host_replacement)}/) do
130             "#{Settings.server_protocol}://#{rule.hosts[0]}"
131           end
132         end
133     end
134
135     def shorten_hosts(url)
136       Array
137         .wrap(Settings.linkify&.normalisation_rules)
138         .reduce(url) { |url, rule| shorten_host(url, rule) }
139     end
140
141     def shorten_link(url)
142       Array.wrap(Settings.linkify&.display_rules)
143            .select { |rule| rule.pattern && rule.replacement }
144            .reduce(url) { |url, rule| url.sub(Regexp.new(rule.pattern), rule.replacement) }
145     end
146
147     def shorten_host(url, rule)
148       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
149         scheme_host, host, path = m.captures
150         if rule.hosts&.include?(host)
151           path = path.sub(Regexp.new(rule.optional_path_prefix || ""), "")
152           if rule.host_replacement
153             "#{rule.host_replacement}#{path}"
154           else
155             "#{scheme_host}#{path}"
156           end
157         end || url
158       end || url
159     end
160   end
161
162   class HTML < Base
163     def to_html
164       linkify(simple_format(self), :paths => false)
165     end
166
167     def to_text
168       to_s
169     end
170   end
171
172   class Markdown < Base
173     def to_html
174       linkify(sanitize(document.to_html), :all, :paths => false)
175     end
176
177     def to_text
178       to_s
179     end
180
181     def image
182       @image_element = first_image_element(document.root) unless defined? @image_element
183       @image_element.attr["src"] if @image_element
184     end
185
186     def image_alt
187       @image_element = first_image_element(document.root) unless defined? @image_element
188       @image_element.attr["alt"] if @image_element
189     end
190
191     def description
192       return @description if defined? @description
193
194       @description = first_truncated_text_content(document.root)
195     end
196
197     private
198
199     def document
200       return @document if @document
201
202       @document = Kramdown::Document.new(self)
203
204       should_get_dir_auto = lambda do |el|
205         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
206         return true if dir_auto_types.include?(el.type)
207         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
208
209         false
210       end
211
212       add_dir = lambda do |element|
213         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
214         element.children.each(&add_dir)
215       end
216       add_dir.call(@document.root)
217
218       @document
219     end
220
221     def first_image_element(element)
222       return element if image?(element) && element.attr["src"].present?
223
224       element.children.find do |child|
225         nested_image = first_image_element(child)
226         break nested_image if nested_image
227       end
228     end
229
230     def first_truncated_text_content(element)
231       if paragraph?(element)
232         truncated_text_content(element)
233       else
234         element.children.find do |child|
235           text = first_truncated_text_content(child)
236           break text unless text.nil?
237         end
238       end
239     end
240
241     def truncated_text_content(element)
242       text = +""
243
244       append_text = lambda do |child|
245         if child.type == :text
246           text << child.value
247         else
248           child.children.each do |c|
249             append_text.call(c)
250             break if text.length > DESCRIPTION_MAX_LENGTH
251           end
252         end
253       end
254       append_text.call(element)
255
256       return nil if text.blank?
257
258       text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
259
260       if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
261         text_truncated_to_word_break
262       else
263         text.truncate(DESCRIPTION_MAX_LENGTH)
264       end
265     end
266
267     def image?(element)
268       element.type == :img || (element.type == :html_element && element.value == "img")
269     end
270
271     def paragraph?(element)
272       element.type == :p || (element.type == :html_element && element.value == "p")
273     end
274   end
275
276   class Text < Base
277     def to_html
278       linkify(simple_format(ERB::Util.html_escape(self)))
279     end
280
281     def to_text
282       to_s
283     end
284   end
285 end