From bc0139403d480ff45f0efe8525effba22f305e31 Mon Sep 17 00:00:00 2001 From: Pablo Brasero Date: Thu, 20 Nov 2025 11:37:04 +0000 Subject: [PATCH] Move spam scoring to own class --- app/models/user.rb | 6 +++--- lib/rich_text.rb | 30 -------------------------- lib/spam_scorer.rb | 41 ++++++++++++++++++++++++++++++++++++ test/lib/rich_text_test.rb | 15 ------------- test/lib/spam_scorer_test.rb | 23 ++++++++++++++++++++ 5 files changed, 67 insertions(+), 48 deletions(-) create mode 100644 lib/spam_scorer.rb create mode 100644 test/lib/spam_scorer_test.rb diff --git a/app/models/user.rb b/app/models/user.rb index 1d21c2e37..a4208dc2d 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -398,11 +398,11 @@ class User < ApplicationRecord def spam_score changeset_score = changesets.size * 50 trace_score = traces.size * 50 - diary_entry_score = diary_entries.visible.inject(0) { |acc, elem| acc + elem.body.spam_score } - diary_comment_score = diary_comments.visible.inject(0) { |acc, elem| acc + elem.body.spam_score } + diary_entry_score = diary_entries.visible.inject(0) { |acc, elem| acc + SpamScorer.new(elem.body).score } + diary_comment_score = diary_comments.visible.inject(0) { |acc, elem| acc + SpamScorer.new(elem.body).score } report_score = Report.where(:category => "spam", :issue => issues.with_status("open")).distinct.count(:user_id) * 20 - score = description.spam_score / 4.0 + score = SpamScorer.new(description).score / 4.0 score += diary_entries.visible.where("created_at > ?", 1.day.ago).count * 10 score += diary_entry_score / diary_entries.visible.length unless diary_entries.visible.empty? score += diary_comment_score / diary_comments.visible.length unless diary_comments.visible.empty? diff --git a/lib/rich_text.rb b/lib/rich_text.rb index e05d030dc..197df06df 100644 --- a/lib/rich_text.rb +++ b/lib/rich_text.rb @@ -1,10 +1,6 @@ # frozen_string_literal: true module RichText - SPAMMY_PHRASES = [ - "Business Description:", "Additional Keywords:" - ].freeze - DESCRIPTION_MAX_LENGTH = 500 DESCRIPTION_WORD_BREAK_THRESHOLD_LENGTH = 450 @@ -26,32 +22,6 @@ module RichText end class Base < String - def spam_score - link_count = 0 - link_size = 0 - - doc = Nokogiri::HTML(to_html) - - if doc.content.empty? - link_proportion = 0 - else - doc.xpath("//a").each do |link| - link_count += 1 - link_size += link.content.length - end - - link_proportion = link_size.to_f / doc.content.length - end - - spammy_phrases = SPAMMY_PHRASES.count do |phrase| - doc.content.include?(phrase) - end - - ([link_proportion - 0.2, 0.0].max * 200) + - (link_count * 40) + - (spammy_phrases * 40) - end - def image nil end diff --git a/lib/spam_scorer.rb b/lib/spam_scorer.rb new file mode 100644 index 000000000..61a62ca0f --- /dev/null +++ b/lib/spam_scorer.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +class SpamScorer + SPAMMY_PHRASES = [ + "Business Description:", "Additional Keywords:" + ].freeze + + def initialize(text) + @text = text + end + + def score + link_count = 0 + link_size = 0 + + doc = Nokogiri::HTML(text.to_html) + + if doc.content.empty? + link_proportion = 0 + else + doc.xpath("//a").each do |link| + link_count += 1 + link_size += link.content.length + end + + link_proportion = link_size.to_f / doc.content.length + end + + spammy_phrases = SPAMMY_PHRASES.count do |phrase| + doc.content.include?(phrase) + end + + ([link_proportion - 0.2, 0.0].max * 200) + + (link_count * 40) + + (spammy_phrases * 40) + end + + private + + attr_reader :text +end diff --git a/test/lib/rich_text_test.rb b/test/lib/rich_text_test.rb index ec9793e4f..febe6fcbf 100644 --- a/test/lib/rich_text_test.rb +++ b/test/lib/rich_text_test.rb @@ -80,11 +80,6 @@ class RichTextTest < ActiveSupport::TestCase assert_equal "foo bar baz", r.to_text end - def test_html_spam_score - r = RichText.new("html", "foo bar baz") - assert_equal 55, r.spam_score.round - end - def test_markdown_to_html r = RichText.new("markdown", "foo http://example.com/ bar") assert_html r do @@ -218,11 +213,6 @@ class RichTextTest < ActiveSupport::TestCase assert_equal "foo [bar](http://example.com/) baz", r.to_text end - def test_markdown_spam_score - r = RichText.new("markdown", "foo [bar](http://example.com/) baz") - assert_equal 50, r.spam_score.round - end - def test_text_to_html_linkify with_settings(:linkify_hosts => ["replace-me.example.com"], :linkify_hosts_replacement => "repl.example.com") do r = RichText.new("text", "foo http://example.com/ bar") @@ -342,11 +332,6 @@ class RichTextTest < ActiveSupport::TestCase assert_equal "foo http://example.com/ bar", r.to_text end - def test_text_spam_score - r = RichText.new("text", "foo http://example.com/ bar") - assert_equal 141, r.spam_score.round - end - def test_text_no_opengraph_properties r = RichText.new("text", "foo https://example.com/ bar") assert_nil r.image diff --git a/test/lib/spam_scorer_test.rb b/test/lib/spam_scorer_test.rb new file mode 100644 index 000000000..bb1801a53 --- /dev/null +++ b/test/lib/spam_scorer_test.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "test_helper" + +class SpamScorerTest < ActiveSupport::TestCase + def test_html_spam_score + r = RichText.new("html", "foo bar baz") + scorer = SpamScorer.new(r) + assert_equal 55, scorer.score.round + end + + def test_markdown_spam_score + r = RichText.new("markdown", "foo [bar](http://example.com/) baz") + scorer = SpamScorer.new(r) + assert_equal 50, scorer.score.round + end + + def test_text_spam_score + r = RichText.new("text", "foo http://example.com/ bar") + scorer = SpamScorer.new(r) + assert_equal 141, scorer.score.round + end +end -- 2.39.5