]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Merge remote-tracking branch 'upstream/pull/7190'
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   DESCRIPTION_MAX_LENGTH = 500
5   DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6   URL_UNSAFE_CHARS = "[^\\w!#$%&'*+,./:;=?@_~^\\-]"
7
8   def self.new(format, text)
9     case format
10     when "html" then HTML.new(text || "")
11     when "markdown" then Markdown.new(text || "")
12     when "text" then Text.new(text || "")
13     end
14   end
15
16   def self.reset_state
17     @gsub_pairs_for_linkify_detection = nil
18   end
19
20   def self.gsub_pairs_for_linkify_detection
21     @gsub_pairs_for_linkify_detection ||=
22       Array
23       .wrap(Settings.linkify&.detection_rules)
24       .select { |rule| rule.path_template && rule.patterns.is_a?(Array) }
25       .flat_map do |rule|
26         expanded_path = "#{rule.host || "#{Settings.server_protocol}://#{Settings.server_url}"}/#{rule.path_template}"
27         rule.patterns
28             .grep(String)
29             .map { |pattern| [Regexp.new("(?<=^|#{URL_UNSAFE_CHARS})#{pattern}", Regexp::IGNORECASE, :timeout => 1), expanded_path] }
30       end
31   end
32
33   class SimpleFormat
34     include ActionView::Helpers::TextHelper
35     include ActionView::Helpers::OutputSafetyHelper
36
37     def sanitize(text, _options = {})
38       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
39     end
40   end
41
42   class Base < String
43     def image
44       nil
45     end
46
47     def image_alt
48       nil
49     end
50
51     def description
52       nil
53     end
54
55     def truncate_html(max_length = nil, img_length = 1000)
56       html_doc = to_html
57       return html_doc if max_length.nil?
58
59       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
60       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
61       accumulated_length = 0
62       exceeded_node_parent = nil
63       truncated = false
64
65       doc.traverse do |node|
66         if accumulated_length >= max_length
67           if node == exceeded_node_parent
68             exceeded_node_parent = node.parent
69             node.remove if keep_or_discards.include?(node.name)
70           else
71             node.remove
72           end
73           next
74         end
75
76         next unless node.children.empty?
77
78         if node.text?
79           accumulated_length += node.text.length
80         elsif node.name == "img"
81           accumulated_length += img_length
82         end
83
84         if accumulated_length >= max_length
85           truncated = true
86           exceeded_node_parent = node.parent
87           node.remove
88         end
89       end
90
91       {
92         :truncated => truncated,
93         :html => doc.to_html.html_safe
94       }
95     end
96
97     protected
98
99     def simple_format(text)
100       SimpleFormat.new.simple_format(text, :dir => "auto")
101     end
102
103     def sanitize(text)
104       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
105     end
106
107     def linkify(text, mode = :urls, hosts: true, paths: true)
108       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
109       html = ERB::Util.html_escape(text)
110
111       html = expand_link_shorthands(html) if paths
112       html = expand_host_shorthands(html) if hosts
113
114       Rinku.auto_link(html, mode, link_attr) do |url|
115         url = shorten_hosts(url) if hosts
116         url = shorten_link(url) if paths
117
118         url
119       end.html_safe
120     end
121
122     private
123
124     def expand_link_shorthands(text)
125       RichText
126         .gsub_pairs_for_linkify_detection
127         .reduce(text) { |text, (pattern, replacement)| text.gsub(pattern, replacement) }
128     end
129
130     def expand_host_shorthands(text)
131       Array
132         .wrap(Settings.linkify&.normalisation_rules)
133         .select { |rule| rule.host_replacement && rule.hosts&.any? }
134         .reduce(text) do |text, rule|
135           text.gsub(/(?<=^|#{URL_UNSAFE_CHARS})\b#{Regexp.escape(rule.host_replacement)}/) do
136             "#{Settings.server_protocol}://#{rule.hosts[0]}"
137           end
138         end
139     end
140
141     def shorten_hosts(url)
142       Array
143         .wrap(Settings.linkify&.normalisation_rules)
144         .reduce(url) { |url, rule| shorten_host(url, rule) }
145     end
146
147     def shorten_link(url)
148       Array.wrap(Settings.linkify&.display_rules)
149            .select { |rule| rule.pattern && rule.replacement }
150            .reduce(url) { |url, rule| url.sub(Regexp.new(rule.pattern), rule.replacement) }
151     end
152
153     def shorten_host(url, rule)
154       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
155         scheme_host, host, path = m.captures
156         if rule.hosts&.include?(host)
157           path = path.sub(Regexp.new(rule.optional_path_prefix || ""), "")
158           if rule.host_replacement
159             "#{rule.host_replacement}#{path}"
160           else
161             "#{scheme_host}#{path}"
162           end
163         end || url
164       end || url
165     end
166   end
167
168   class HTML < Base
169     def to_html
170       linkify(simple_format(self), :paths => false)
171     end
172
173     def to_text
174       to_s
175     end
176   end
177
178   class Markdown < Base
179     def to_html
180       linkify(sanitize(document.to_html), :all, :paths => false)
181     end
182
183     def to_text
184       to_s
185     end
186
187     def image
188       @image_element = first_image_element(document.root) unless defined? @image_element
189       @image_element.attr["src"] if @image_element
190     end
191
192     def image_alt
193       @image_element = first_image_element(document.root) unless defined? @image_element
194       @image_element.attr["alt"] if @image_element
195     end
196
197     def description
198       return @description if defined? @description
199
200       @description = first_truncated_text_content(document.root)
201     end
202
203     private
204
205     def document
206       return @document if @document
207
208       @document = Kramdown::Document.new(self)
209
210       should_get_dir_auto = lambda do |el|
211         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
212         return true if dir_auto_types.include?(el.type)
213         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
214
215         false
216       end
217
218       add_dir = lambda do |element|
219         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
220         element.children.each(&add_dir)
221       end
222       add_dir.call(@document.root)
223
224       @document
225     end
226
227     def first_image_element(element)
228       return element if image?(element) && element.attr["src"].present?
229
230       element.children.find do |child|
231         nested_image = first_image_element(child)
232         break nested_image if nested_image
233       end
234     end
235
236     def first_truncated_text_content(element)
237       if paragraph?(element)
238         truncated_text_content(element)
239       else
240         element.children.find do |child|
241           text = first_truncated_text_content(child)
242           break text unless text.nil?
243         end
244       end
245     end
246
247     def truncated_text_content(element)
248       text = +""
249
250       append_text = lambda do |child|
251         if child.type == :text
252           text << child.value
253         else
254           child.children.each do |c|
255             append_text.call(c)
256             break if text.length > DESCRIPTION_MAX_LENGTH
257           end
258         end
259       end
260       append_text.call(element)
261
262       return nil if text.blank?
263
264       text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
265
266       if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
267         text_truncated_to_word_break
268       else
269         text.truncate(DESCRIPTION_MAX_LENGTH)
270       end
271     end
272
273     def image?(element)
274       element.type == :img || (element.type == :html_element && element.value == "img")
275     end
276
277     def paragraph?(element)
278       element.type == :p || (element.type == :html_element && element.value == "p")
279     end
280   end
281
282   class Text < Base
283     def to_html
284       linkify(simple_format(ERB::Util.html_escape(self)))
285     end
286
287     def to_text
288       to_s
289     end
290   end
291 end