]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
make markdown bidirectional with dir="auto"
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   SPAMMY_PHRASES = [
5     "Business Description:", "Additional Keywords:"
6   ].freeze
7
8   MAX_DESCRIPTION_LENGTH = 500
9
10   def self.new(format, text)
11     case format
12     when "html" then HTML.new(text || "")
13     when "markdown" then Markdown.new(text || "")
14     when "text" then Text.new(text || "")
15     end
16   end
17
18   class SimpleFormat
19     include ActionView::Helpers::TextHelper
20     include ActionView::Helpers::OutputSafetyHelper
21
22     def sanitize(text, _options = {})
23       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
24     end
25   end
26
27   class Base < String
28     def spam_score
29       link_count = 0
30       link_size = 0
31
32       doc = Nokogiri::HTML(to_html)
33
34       if doc.content.empty?
35         link_proportion = 0
36       else
37         doc.xpath("//a").each do |link|
38           link_count += 1
39           link_size += link.content.length
40         end
41
42         link_proportion = link_size.to_f / doc.content.length
43       end
44
45       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
46         doc.content.include?(phrase)
47       end
48
49       ([link_proportion - 0.2, 0.0].max * 200) +
50         (link_count * 40) +
51         (spammy_phrases * 40)
52     end
53
54     def image
55       nil
56     end
57
58     def image_alt
59       nil
60     end
61
62     def description
63       nil
64     end
65
66     protected
67
68     def simple_format(text)
69       SimpleFormat.new.simple_format(text, :dir => "auto")
70     end
71
72     def sanitize(text)
73       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
74     end
75
76     def linkify(text, mode = :urls)
77       link_attr = 'rel="nofollow noopener noreferrer"'
78       Rinku.auto_link(ERB::Util.html_escape(text), mode, link_attr) do |url|
79         url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
80         shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement) do |path|
81           path.sub(Regexp.new(Settings.linkify_wiki_optional_path_prefix || ""), "")
82         end
83       end.html_safe
84     end
85
86     private
87
88     def shorten_host(url, hosts, hosts_replacement)
89       %r{^(https?://([^/]*))(.*)$}.match(url) do |m|
90         scheme_host, host, path = m.captures
91         if hosts&.include?(host)
92           path = yield(path) if block_given?
93           if hosts_replacement
94             "#{hosts_replacement}#{path}"
95           else
96             "#{scheme_host}#{path}"
97           end
98         end || url
99       end || url
100     end
101   end
102
103   class HTML < Base
104     def to_html
105       linkify(simple_format(self))
106     end
107
108     def to_text
109       to_s
110     end
111   end
112
113   class Markdown < Base
114     def to_html
115       linkify(sanitize(document.to_html), :all)
116     end
117
118     def to_text
119       to_s
120     end
121
122     def image
123       @image_element = first_image_element(document.root) unless defined? @image_element
124       @image_element.attr["src"] if @image_element
125     end
126
127     def image_alt
128       @image_element = first_image_element(document.root) unless defined? @image_element
129       @image_element.attr["alt"] if @image_element
130     end
131
132     def description
133       return @description if defined? @description
134
135       @description = first_truncated_text_content(document.root)
136     end
137
138     private
139
140     def document
141       return @document if @document
142
143       @document = Kramdown::Document.new(self)
144
145       should_get_dir_auto = lambda do |el|
146         dir_auto_types = [:p, :header, :codespan, :codeblock, :pre, :ul, :ol, :table, :dl, :math]
147         return true if dir_auto_types.include?(el.type)
148         return true if el.type == :a && el.children.length == 1 && el.children[0].type == :text && el.children[0].value == el.attr["href"]
149
150         false
151       end
152
153       add_dir = lambda do |element|
154         element.attr["dir"] ||= "auto" if should_get_dir_auto.call(element)
155         element.children.each(&add_dir)
156       end
157       add_dir.call(@document.root)
158
159       @document
160     end
161
162     def first_image_element(element)
163       return element if image?(element) && element.attr["src"].present?
164
165       element.children.find do |child|
166         nested_image = first_image_element(child)
167         break nested_image if nested_image
168       end
169     end
170
171     def first_truncated_text_content(element)
172       if paragraph?(element)
173         truncated_text_content(element)
174       else
175         element.children.find do |child|
176           text = first_truncated_text_content(child)
177           break text unless text.nil?
178         end
179       end
180     end
181
182     def truncated_text_content(element)
183       text = +""
184
185       append_text = lambda do |child|
186         if child.type == :text
187           text << child.value
188         else
189           child.children.each do |c|
190             append_text.call(c)
191             break if text.length > MAX_DESCRIPTION_LENGTH
192           end
193         end
194       end
195       append_text.call(element)
196
197       return nil if text.blank?
198
199       text.truncate(MAX_DESCRIPTION_LENGTH)
200     end
201
202     def image?(element)
203       element.type == :img || (element.type == :html_element && element.value == "img")
204     end
205
206     def paragraph?(element)
207       element.type == :p || (element.type == :html_element && element.value == "p")
208     end
209   end
210
211   class Text < Base
212     def to_html
213       linkify(simple_format(ERB::Util.html_escape(self)))
214     end
215
216     def to_text
217       to_s
218     end
219   end
220 end