]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Fix copy-pasting breaking links in linkify
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   DESCRIPTION_MAX_LENGTH = 500
5   DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450
6   URL_UNSAFE_CHARS = "[^\\w!#$%&'*+,./:;=?@_~^\\-]"
7
8   def self.new(format, text)
9     case format
10     when "html" then HTML.new(text || "")
11     when "markdown" then Markdown.new(text || "")
12     when "text" then Text.new(text || "")
13     end
14   end
15
16   class SimpleFormat
17     include ActionView::Helpers::TextHelper
18     include ActionView::Helpers::OutputSafetyHelper
19
20     def sanitize(text, _options = {})
21       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
22     end
23   end
24
25   class Base < String
26     def image
27       nil
28     end
29
30     def image_alt
31       nil
32     end
33
34     def description
35       nil
36     end
37
38     def truncate_html(max_length = nil, img_length = 1000)
39       html_doc = to_html
40       return html_doc if max_length.nil?
41
42       doc = Nokogiri::HTML::DocumentFragment.parse(html_doc)
43       keep_or_discards = %w[p h1 h2 h3 h4 h5 h6 pre a table ul ol dl]
44       accumulated_length = 0
45       exceeded_node_parent = nil
46       truncated = false
47
48       doc.traverse do |node|
49         if accumulated_length >= max_length
50           if node == exceeded_node_parent
51             exceeded_node_parent = node.parent
52             node.remove if keep_or_discards.include?(node.name)
53           else
54             node.remove
55           end
56           next
57         end
58
59         next unless node.children.empty?
60
61         if node.text?
62           accumulated_length += node.text.length
63         elsif node.name == "img"
64           accumulated_length += img_length
65         end
66
67         if accumulated_length >= max_length
68           truncated = true
69           exceeded_node_parent = node.parent
70           node.remove
71         end
72       end
73
74       {
75         :truncated => truncated,
76         :html => doc.to_html.html_safe
77       }
78     end
79
80     protected
81
82     def simple_format(text)
83       SimpleFormat.new.simple_format(text, :dir => "auto")
84     end
85
86     def sanitize(text)
87       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
88     end
89
90     def linkify(text, mode = :urls)
91       ERB::Util.html_escape(text)
92                .then { |html| expand_host_shorthands(html) }
93                .then { |html| auto_link(html, mode) }
94                .html_safe
95     end
96
97     private
98
99     def expand_host_shorthands(text)
100       [
101         [Settings.linkify_hosts, Settings.linkify_hosts_replacement],
102         [Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement]
103       ]
104         .select { |hosts, replacement| replacement && hosts&.any? }
105         .reduce(text) do |text, (hosts, replacement)|
106           text.gsub(/(?<=^|#{URL_UNSAFE_CHARS})\b#{Regexp.escape(replacement)}/) do
107             "#{Settings.server_protocol}://#{hosts[0]}"
108           end
109         end
110     end
111
112     def auto_link(text, mode)
113       link_attr = 'rel="nofollow noopener noreferrer" dir="auto"'
114       Rinku.auto_link(text, mode, link_attr) { |url| format_link_text(url) }
115     end
116
117     def format_link_text(url)
118       url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
119       shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
120         path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
121       end
122     end
123
124     def shorten_host(url, hosts, hosts_replacement)
125       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
126         scheme_host, host, path = m.captures
127         if hosts&.include?(host)
128           path = yield(path) if block_given?
129           if hosts_replacement
130             "#{hosts_replacement}#{path}"
131           else
132             "#{scheme_host}#{path}"
133           end
134         end || url
135       end || url
136     end
137   end
138
139   class HTML < Base
140     def to_html
141       linkify(simple_format(self))
142     end
143
144     def to_text
145       to_s
146     end
147   end
148
149   class Markdown < Base
150     def to_html
151       linkify(sanitize(document.to_html), :all)
152     end
153
154     def to_text
155       to_s
156     end
157
158     def image
159       @image_element = first_image_element(document.root) unless defined? @image_element
160       @image_element.attr["src"] if @image_element
161     end
162
163     def image_alt
164       @image_element = first_image_element(document.root) unless defined? @image_element
165       @image_element.attr["alt"] if @image_element
166     end
167
168     def description
169       return @description if defined? @description
170
171       @description = first_truncated_text_content(document.root)
172     end
173
174     private
175
176     def document
177       return @document if @document
178
179       @document = Kramdown::Document.new(self)
180
181       should_get_dir_auto = lambda do |el|
182         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
183         return true if dir_auto_types.include?(el.type)
184         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
185
186         false
187       end
188
189       add_dir = lambda do |element|
190         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
191         element.children.each(&add_dir)
192       end
193       add_dir.call(@document.root)
194
195       @document
196     end
197
198     def first_image_element(element)
199       return element if image?(element) && element.attr["src"].present?
200
201       element.children.find do |child|
202         nested_image = first_image_element(child)
203         break nested_image if nested_image
204       end
205     end
206
207     def first_truncated_text_content(element)
208       if paragraph?(element)
209         truncated_text_content(element)
210       else
211         element.children.find do |child|
212           text = first_truncated_text_content(child)
213           break text unless text.nil?
214         end
215       end
216     end
217
218     def truncated_text_content(element)
219       text = +""
220
221       append_text = lambda do |child|
222         if child.type == :text
223           text << child.value
224         else
225           child.children.each do |c|
226             append_text.call(c)
227             break if text.length > DESCRIPTION_MAX_LENGTH
228           end
229         end
230       end
231       append_text.call(element)
232
233       return nil if text.blank?
234
235       text_truncated_to_word_break = text.truncate(DESCRIPTION_MAX_LENGTH, :separator => /(?<!\s)\s+/)
236
237       if text_truncated_to_word_break.length >= DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH
238         text_truncated_to_word_break
239       else
240         text.truncate(DESCRIPTION_MAX_LENGTH)
241       end
242     end
243
244     def image?(element)
245       element.type == :img || (element.type == :html_element && element.value == "img")
246     end
247
248     def paragraph?(element)
249       element.type == :p || (element.type == :html_element && element.value == "p")
250     end
251   end
252
253   class Text < Base
254     def to_html
255       linkify(simple_format(ERB::Util.html_escape(self)))
256     end
257
258     def to_text
259       to_s
260     end
261   end
262 end