]> git.openstreetmap.org Git - rails.git/blob - lib/rich_text.rb
Shorten matching wiki urls in linkify
[rails.git] / lib / rich_text.rb
1 # frozen_string_literal: true
2
3 module RichText
4   SPAMMY_PHRASES = [
5     "Business Description:", "Additional Keywords:"
6   ].freeze
7
8   MAX_DESCRIPTION_LENGTH = 500
9
10   def self.new(format, text)
11     case format
12     when "html" then HTML.new(text || "")
13     when "markdown" then Markdown.new(text || "")
14     when "text" then Text.new(text || "")
15     end
16   end
17
18   class SimpleFormat
19     include ActionView::Helpers::TextHelper
20     include ActionView::Helpers::OutputSafetyHelper
21
22     def sanitize(text, _options = {})
23       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
24     end
25   end
26
27   class Base < String
28     include ActionView::Helpers::TagHelper
29
30     def spam_score
31       link_count = 0
32       link_size = 0
33
34       doc = Nokogiri::HTML(to_html)
35
36       if doc.content.empty?
37         link_proportion = 0
38       else
39         doc.xpath("//a").each do |link|
40           link_count += 1
41           link_size += link.content.length
42         end
43
44         link_proportion = link_size.to_f / doc.content.length
45       end
46
47       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
48         doc.content.include?(phrase)
49       end
50
51       ([link_proportion - 0.2, 0.0].max * 200) +
52         (link_count * 40) +
53         (spammy_phrases * 40)
54     end
55
56     def image
57       nil
58     end
59
60     def image_alt
61       nil
62     end
63
64     def description
65       nil
66     end
67
68     protected
69
70     def simple_format(text)
71       SimpleFormat.new.simple_format(text, :dir => "auto")
72     end
73
74     def sanitize(text)
75       Sanitize.clean(text, Sanitize::Config::OSM).html_safe
76     end
77
78     def linkify(text, mode = :urls)
79       link_attr = tag_builder.tag_options(:rel => "nofollow noopener noreferrer")
80       Rinku.auto_link(ERB::Util.html_escape(text), mode, link_attr) do |url|
81         url = shorten_host(url, Settings.linkify_hosts, Settings.linkify_hosts_replacement)
82         shorten_host(url, Settings.linkify_wiki_hosts, Settings.linkify_wiki_hosts_replacement)
83       end.html_safe
84     end
85
86     private
87
88     def shorten_host(url, hosts, hosts_replacement)
89       %r{^https?://([^/]*)(.*)$}.match(url) do |m|
90         "#{hosts_replacement}#{m[2]}" if hosts_replacement && hosts&.include?(m[1])
91       end || url
92     end
93   end
94
95   class HTML < Base
96     def to_html
97       linkify(sanitize(simple_format(self)))
98     end
99
100     def to_text
101       to_s
102     end
103   end
104
105   class Markdown < Base
106     def to_html
107       linkify(sanitize(document.to_html), :all)
108     end
109
110     def to_text
111       to_s
112     end
113
114     def image
115       @image_element = first_image_element(document.root) unless defined? @image_element
116       @image_element.attr["src"] if @image_element
117     end
118
119     def image_alt
120       @image_element = first_image_element(document.root) unless defined? @image_element
121       @image_element.attr["alt"] if @image_element
122     end
123
124     def description
125       return @description if defined? @description
126
127       @description = first_truncated_text_content(document.root)
128     end
129
130     private
131
132     def document
133       @document ||= Kramdown::Document.new(self)
134     end
135
136     def first_image_element(element)
137       return element if image?(element) && element.attr["src"].present?
138
139       element.children.find do |child|
140         nested_image = first_image_element(child)
141         break nested_image if nested_image
142       end
143     end
144
145     def first_truncated_text_content(element)
146       if paragraph?(element)
147         truncated_text_content(element)
148       else
149         element.children.find do |child|
150           text = first_truncated_text_content(child)
151           break text unless text.nil?
152         end
153       end
154     end
155
156     def truncated_text_content(element)
157       text = +""
158
159       append_text = lambda do |child|
160         if child.type == :text
161           text << child.value
162         else
163           child.children.each do |c|
164             append_text.call(c)
165             break if text.length > MAX_DESCRIPTION_LENGTH
166           end
167         end
168       end
169       append_text.call(element)
170
171       return nil if text.blank?
172
173       text.truncate(MAX_DESCRIPTION_LENGTH)
174     end
175
176     def image?(element)
177       element.type == :img || (element.type == :html_element && element.value == "img")
178     end
179
180     def paragraph?(element)
181       element.type == :p || (element.type == :html_element && element.value == "p")
182     end
183   end
184
185   class Text < Base
186     def to_html
187       linkify(simple_format(ERB::Util.html_escape(self)))
188     end
189
190     def to_text
191       to_s
192     end
193   end
194 end