From 38f4e17865948228cd5f573d49fe2c0c28ef4cbf Mon Sep 17 00:00:00 2001 From: Andy Allan Date: Tue, 28 Oct 2008 18:34:05 +0000 Subject: [PATCH] message title shouldn't accept invalid utf-8 sequences - test currently fails --- test/unit/message_test.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unit/message_test.rb b/test/unit/message_test.rb index e95c698c9..ea05b1307 100644 --- a/test/unit/message_test.rb +++ b/test/unit/message_test.rb @@ -51,6 +51,21 @@ class MessageTest < Test::Unit::TestCase assert_raise(ActiveRecord::RecordInvalid) { make_message(EURO, 256).save! } end + def test_invalid_utf8 + # See e.g http://en.wikipedia.org/wiki/UTF-8 for byte sequences + # FIXME - Invalid Unicode characters can still be encoded into "valid" utf-8 byte sequences - maybe check this too? + invalid_sequences = ["\xC0", # always invalid utf8 + "\xC2\x4a", # 2-byte multibyte identifier, followed by plain ASCII + "\xC2\xC2", # 2-byte multibyte identifier, followed by another one + "\x4a\x82", # plain ASCII, followed by multibyte continuation + "\x82\x82", # multibyte continuations without multibyte identifier + "\xe1\x82\x4a", # three-byte identifier, contination and (incorrectly) plain ASCII + ] + invalid_sequences.each do |char| + assert_raise(ActiveRecord::RecordInvalid) { make_message(char, 1).save! } + end + end + def make_message(char, count) message = messages(:one) message.title = char * count -- 2.43.2