X-Git-Url: https://git.openstreetmap.org/rails.git/blobdiff_plain/25c3310be4c0b497d93e92a4d8141a50c12c4cd2..eff06faf46c8f0cc4d92a16f6b2daa22eabb4893:/test/unit/message_test.rb

diff --git a/test/unit/message_test.rb b/test/unit/message_test.rb
index e95c698c9..d40d85f30 100644
--- a/test/unit/message_test.rb
+++ b/test/unit/message_test.rb
@@ -1,7 +1,8 @@
 require File.dirname(__FILE__) + '/../test_helper'
 
-class MessageTest < Test::Unit::TestCase
-  fixtures :messages, :users
+class MessageTest < ActiveSupport::TestCase
+  api_fixtures
+  fixtures :messages
 
   EURO = "\xe2\x82\xac" #euro symbol
 
@@ -51,6 +52,35 @@ class MessageTest < Test::Unit::TestCase
     assert_raise(ActiveRecord::RecordInvalid) { make_message(EURO, 256).save! }
   end
 
+  def test_invalid_utf8
+    # See e.g http://en.wikipedia.org/wiki/UTF-8 for byte sequences
+    # FIXME - Invalid Unicode characters can still be encoded into "valid" utf-8 byte sequences - maybe check this too?
+    invalid_sequences = ["\xC0",         # always invalid utf8
+                         "\xC2\x4a",     # 2-byte multibyte identifier, followed by plain ASCII
+                         "\xC2\xC2",     # 2-byte multibyte identifier, followed by another one
+                         "\x4a\x82",     # plain ASCII, followed by multibyte continuation
+                         "\x82\x82",     # multibyte continuations without multibyte identifier
+                         "\xe1\x82\x4a", # three-byte identifier, contination and (incorrectly) plain ASCII
+                        ]
+    invalid_sequences.each do |char|
+      begin
+        # create a message and save to the database
+        msg = make_message(char, 1)
+        # if the save throws, thats fine and the test should pass, as we're
+        # only testing invalid sequences anyway.
+        msg.save! 
+
+        # get the saved message back and check that it is identical - i.e: 
+        # its OK to accept invalid UTF-8 as long as we return it unmodified.
+        db_msg = msg.class.find(msg.id)
+        assert_equal char, db_msg.title, "Database silently truncated message title"
+
+      rescue ActiveRecord::RecordInvalid
+        # because we only test invalid sequences it is OK to barf on them
+      end
+    end
+  end  
+
   def make_message(char, count)
     message = messages(:one)
     message.title = char * count