123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- --TEST--
- Exhaustive test of mUTF-7 (IMAP) encoding verification and conversion
- --EXTENSIONS--
- mbstring
- --FILE--
- <?php
- include('encoding_tests.inc');
- mb_substitute_character(0x25);
- function utf16BE($utf8) {
- return mb_convert_encoding($utf8, 'UTF-16BE', 'UTF-8');
- }
- function mBase64($str) {
- return str_replace('=', '', str_replace('/', ',', base64_encode($str)));
- }
- function testValid($from, $to, $bothWays = true) {
- testValidString($from, $to, 'UTF7-IMAP', 'UTF-8', $bothWays);
- }
- function testInvalid($from, $to) {
- testInvalidString($from, $to, 'UTF7-IMAP', 'UTF-8');
- }
- testValid("", "");
- echo "Identification passes on empty string... good start!\n";
- for ($i = 0x20; $i <= 0x7E; $i++) {
- if ($i == 0x26)
- continue;
- testValid(chr($i), chr($i));
- }
- echo "Testing all valid single-character ASCII strings... check!\n";
- for ($i = 0; $i < 0x20; $i++)
- testInvalid(chr($i), "%");
- for ($i = 0x7F; $i < 256; $i++)
- testInvalid(chr($i), "%");
- echo "Non-ASCII characters convert to illegal char marker... yes!\n";
- testValid("&" . mBase64(utf16BE("&")) . "-", "&", false);
- echo "& can be Base64-encoded... yes!\n";
- identifyInvalidString("&", 'UTF7-IMAP');
- identifyInvalidString("abc&", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64(utf16BE("ハムサンドイッチ")), 'UTF7-IMAP');
- echo "Testing unterminated & sections... yep!\n";
- for ($i = 0x20; $i <= 0x7E; $i++) {
- if ($i == 0x26)
- continue;
- testInvalid("&" . mBase64(utf16BE(chr($i))) . "-", "%");
- }
- echo "Testing ASCII characters which are Base64-encoded... great!\n";
- testValid("&-", "&");
- testValid("abc&-", "abc&");
- testValid("&-.&-", "&.&");
- echo "Testing valid strings which use '&-' for '&'... good!\n";
- $testString = mBase64(utf16BE("我是打酱油的"));
- if (strlen($testString) != 16)
- die("Erk!!");
- for ($i = 0; $i < 256; $i++) {
- if ($i >= 0x30 && $i <= 0x39)
- continue;
- if ($i >= 0x41 && $i <= 0x5A)
- continue;
- if ($i >= 0x61 && $i <= 0x7A)
- continue;
- if ($i == 0x2B || $i == 0x2C)
- continue;
- if ($i == 0x2D)
- continue;
- identifyInvalidString("&" . substr($testString, 0, 11) . chr($i) . "-", 'UTF7-IMAP');
- }
- echo "Identification fails when Base64 sections contain non-Base64 bytes... right!\n";
- $testString = mb_convert_encoding("\x00\x01\x04\x00", 'UTF-16BE', 'UTF-32BE');
- if (strlen($testString) != 4)
- die("Ouch!");
- $testString = substr($testString, 2, 2) . substr($testString, 0, 2);
- identifyInvalidString("&" . mBase64($testString) . "-", 'UTF7-IMAP');
- $testString = substr($testString, 0, 2);
- identifyInvalidString("&" . mBase64($testString) . "-", 'UTF7-IMAP');
- $singleChar = mb_convert_encoding("1", 'UTF-16BE', 'ASCII');
- $doubleChar = mb_convert_encoding("\x00\x01\x04\x01", 'UTF-16BE', 'UTF-32BE');
- if (strlen($doubleChar) != 4)
- die("That was supposed to be a surrogate pair");
- identifyInvalidString("&" . mBase64($singleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $singleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $singleChar . $singleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($doubleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $doubleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $singleChar . $doubleChar . $testString) . "-", 'UTF7-IMAP');
- $testString = mb_convert_encoding("\x00\x01\x04\x00", 'UTF-16BE', 'UTF-32BE');
- $testString = substr($testString, 0, 2) . mb_convert_encoding("a", 'UTF-16BE', 'ASCII');
- identifyInvalidString("&" . mBase64($testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $singleChar . $testString) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($doubleChar . $testString) . "-", 'UTF7-IMAP');
- $testString = mb_convert_encoding("\x00\x01\x04\x00", 'UTF-16BE', 'UTF-32BE');
- identifyInvalidString("&" . mBase64(substr($testString, 0, 2)) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . substr($testString, 0, 2)) . "-", 'UTF7-IMAP');
- identifyInvalidString("&" . mBase64($singleChar . $singleChar . substr($testString, 0, 2)) . "-", 'UTF7-IMAP');
- $testString = utf16BE("ドーナツ");
- $testString = substr($testString, 0, strlen($testString) - 1);
- identifyInvalidString("&" . mBase64($testString) . "-", 'UTF7-IMAP');
- $testString = utf16BE("☺⛑");
- if (strlen($testString) != 4)
- die("No good");
- $encoded = mBase64($testString);
- if (strlen($encoded) != 6)
- die("Don't like that");
- identifyInvalidString("&" . substr($encoded, 0, strlen($encoded) - 1) . ",-", 'UTF7-IMAP');
- echo "Identification fails when UTF-16 text is invalid... no sweat!\n";
- testValid("&" . mBase64(utf16BE("☺")) . "-", "☺");
- testValid("&" . mBase64(utf16BE("饺子")) . "-", "饺子");
- testValid("&" . mBase64(utf16BE("123")) . "-", "123");
- testValid("&" . mBase64(utf16BE("ᄚᄆᄇᄈ")) . "-", "ᄚᄆᄇᄈ");
- $longChar1 = mb_convert_encoding("\x00\x01\x04\x01", 'UTF-16BE', 'UTF-32BE');
- $longChar2 = mb_convert_encoding("\x00\x01\x04\x01", 'UTF-8', 'UTF-32BE');
- testValid("&" . mBase64($longChar1) . "-", $longChar2);
- testValid("&" . mBase64(utf16BE("饼") . $longChar1) . "-", "饼" . $longChar2);
- testValid("&" . mBase64($longChar1 . utf16BE("饼")) . "-", $longChar2 . "饼");
- testValid("&" . mBase64(utf16BE("☺") . $longChar1 . utf16BE("饼")) . "-", "☺" . $longChar2 . "饼");
- testValid("&" . mBase64(utf16BE("西瓜") . $longChar1) . "-", "西瓜" . $longChar2);
- testValid("&" . mBase64(utf16BE("西瓜") . $longChar1 . utf16BE("☺")) . "-", "西瓜" . $longChar2 . "☺");
- testValid("&" . mBase64(utf16BE("西瓜") . $longChar1 . $longChar1) . "-", "西瓜" . $longChar2 . $longChar2);
- testValid("&" . mBase64(utf16BE("西红柿") . $longChar1) . "-", "西红柿" . $longChar2);
- testValid("123&" . mBase64(utf16BE("123")) . "-abc&" . mBase64(utf16BE("☺")) . "-.", "123123abc☺.");
- testValidString("☺&", "&Jjo-&-", "UTF-8", "UTF7-IMAP", false);
- testValidString("西瓜&", "&iX903A-&-", "UTF-8", "UTF7-IMAP", false);
- testValidString("西红柿&", "&iX9+omf,-&-", "UTF-8", "UTF7-IMAP", false);
- echo "Identification and conversion of valid text is working... perfect!\n";
- convertInvalidString("\x00\x20\x00\x00", "%", "UCS-4BE", "UTF7-IMAP");
- mb_substitute_character("long");
- convertInvalidString("\x10", "%", "UTF7-IMAP", "UTF-8");
- convertInvalidString("\x80", "%", "UTF7-IMAP", "UTF-8");
- convertInvalidString("abc&", "abc%", "UTF7-IMAP", "UTF-8");
- convertInvalidString("&**-", "%*-", "UTF7-IMAP", "UTF-8");
- echo "Done!\n";
- ?>
- --EXPECT--
- Identification passes on empty string... good start!
- Testing all valid single-character ASCII strings... check!
- Non-ASCII characters convert to illegal char marker... yes!
- & can be Base64-encoded... yes!
- Testing unterminated & sections... yep!
- Testing ASCII characters which are Base64-encoded... great!
- Testing valid strings which use '&-' for '&'... good!
- Identification fails when Base64 sections contain non-Base64 bytes... right!
- Identification fails when UTF-16 text is invalid... no sweat!
- Identification and conversion of valid text is working... perfect!
- Done!
|