]> git.openstreetmap.org Git - rails.git/blob - lib/spam_scorer/rich_text.rb
Merge remote-tracking branch 'upstream/pull/6549'
[rails.git] / lib / spam_scorer / rich_text.rb
1 # frozen_string_literal: true
2
3 module SpamScorer
4   class RichText
5     def initialize(text)
6       @text = text
7     end
8
9     def score
10       link_count = 0
11       link_size = 0
12
13       doc = Nokogiri::HTML(text.to_html)
14
15       if doc.content.empty?
16         link_proportion = 0
17       else
18         doc.xpath("//a").each do |link|
19           link_count += 1
20           link_size += link.content.length
21         end
22
23         link_proportion = link_size.to_f / doc.content.length
24       end
25
26       comparable_content = to_comparable_form(doc.content)
27       spammy_phrases = SpammyPhrase.pluck(:phrase).count do |phrase|
28         comparable_content.include?(to_comparable_form(phrase))
29       end
30
31       ([link_proportion - 0.2, 0.0].max * 200) +
32         (link_count * 40) +
33         (spammy_phrases * 40)
34     end
35
36     private
37
38     attr_reader :text
39
40     def to_comparable_form(str)
41       str.downcase(:fold).unicode_normalize(:nfkc)
42     end
43   end
44 end