]> git.openstreetmap.org Git - rails.git/blob - lib/spam_scorer/rich_text.rb
Add DB-backed model SpammyPhrases
[rails.git] / lib / spam_scorer / rich_text.rb
1 # frozen_string_literal: true
2
3 module SpamScorer
4   class RichText
5     def initialize(text)
6       @text = text
7     end
8
9     def score
10       link_count = 0
11       link_size = 0
12
13       doc = Nokogiri::HTML(text.to_html)
14
15       if doc.content.empty?
16         link_proportion = 0
17       else
18         doc.xpath("//a").each do |link|
19           link_count += 1
20           link_size += link.content.length
21         end
22
23         link_proportion = link_size.to_f / doc.content.length
24       end
25
26       spammy_phrases = SPAMMY_PHRASES.count do |phrase|
27         doc.content.include?(phrase)
28       end
29
30       ([link_proportion - 0.2, 0.0].max * 200) +
31         (link_count * 40) +
32         (spammy_phrases * 40)
33     end
34
35     private
36
37     attr_reader :text
38   end
39 end