X-Git-Url: https://git.openstreetmap.org/rails.git/blobdiff_plain/25c3310be4c0b497d93e92a4d8141a50c12c4cd2..eff06faf46c8f0cc4d92a16f6b2daa22eabb4893:/test/unit/message_test.rb diff --git a/test/unit/message_test.rb b/test/unit/message_test.rb index e95c698c9..d40d85f30 100644 --- a/test/unit/message_test.rb +++ b/test/unit/message_test.rb @@ -1,7 +1,8 @@ require File.dirname(__FILE__) + '/../test_helper' -class MessageTest < Test::Unit::TestCase - fixtures :messages, :users +class MessageTest < ActiveSupport::TestCase + api_fixtures + fixtures :messages EURO = "\xe2\x82\xac" #euro symbol @@ -51,6 +52,35 @@ class MessageTest < Test::Unit::TestCase assert_raise(ActiveRecord::RecordInvalid) { make_message(EURO, 256).save! } end + def test_invalid_utf8 + # See e.g http://en.wikipedia.org/wiki/UTF-8 for byte sequences + # FIXME - Invalid Unicode characters can still be encoded into "valid" utf-8 byte sequences - maybe check this too? + invalid_sequences = ["\xC0", # always invalid utf8 + "\xC2\x4a", # 2-byte multibyte identifier, followed by plain ASCII + "\xC2\xC2", # 2-byte multibyte identifier, followed by another one + "\x4a\x82", # plain ASCII, followed by multibyte continuation + "\x82\x82", # multibyte continuations without multibyte identifier + "\xe1\x82\x4a", # three-byte identifier, contination and (incorrectly) plain ASCII + ] + invalid_sequences.each do |char| + begin + # create a message and save to the database + msg = make_message(char, 1) + # if the save throws, thats fine and the test should pass, as we're + # only testing invalid sequences anyway. + msg.save! + + # get the saved message back and check that it is identical - i.e: + # its OK to accept invalid UTF-8 as long as we return it unmodified. + db_msg = msg.class.find(msg.id) + assert_equal char, db_msg.title, "Database silently truncated message title" + + rescue ActiveRecord::RecordInvalid + # because we only test invalid sequences it is OK to barf on them + end + end + end + def make_message(char, count) message = messages(:one) message.title = char * count