From d80f260b8bb56032b24a3d85a0b7822ce1135e4a Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Thu, 29 Apr 2010 00:29:49 +0100 Subject: [PATCH] Implement OSM.spam_score to return a spam score for a piece of text --- lib/osm.rb | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lib/osm.rb b/lib/osm.rb index cb23b0c97..46b904477 100644 --- a/lib/osm.rb +++ b/lib/osm.rb @@ -7,6 +7,7 @@ module OSM require 'xml/libxml' require 'digest/md5' require 'RMagick' + require 'nokogiri' # The base class for API Errors. class APIError < RuntimeError @@ -498,5 +499,24 @@ module OSM return "#{tilesql} AND #{prefix}latitude BETWEEN #{minlat} AND #{maxlat} AND #{prefix}longitude BETWEEN #{minlon} AND #{maxlon}" end + # Return a spam score for a chunk of text + def self.spam_score(text) + link_count = 0 + link_size = 0 + doc = Nokogiri::HTML(text) + + if doc.content.length > 0 + doc.xpath("//a").each do |link| + link_count += 1 + link_size += link.content.length + end + + link_proportion = link_size.to_f / doc.content.length.to_f + else + link_proportion = 0 + end + + return [link_proportion - 0.2, 0.0].max * 200 + link_count * 20 + end end -- 2.43.2