1 /// 2 module imap.namespace; 3 4 immutable B64Enc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; 5 immutable ubyte[256] B64Dec = [ 6 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 7 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 8 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x3e, 0x3f,0xff,0xff,0xff, 9 0x34,0x35,0x36,0x37, 0x38,0x39,0x3a,0x3b, 0x3c,0x3d,0xff,0xff, 0xff,0xff,0xff,0xff, 10 0xff,0x00,0x01,0x02, 0x03,0x04,0x05,0x06, 0x07,0x08,0x09,0x0a, 0x0b,0x0c,0x0d,0x0e, 11 0x0f,0x10,0x11,0x12, 0x13,0x14,0x15,0x16, 0x17,0x18,0x19,0xff, 0xff,0xff,0xff,0xff, 12 0xff,0x1a,0x1b,0x1c, 0x1d,0x1e,0x1f,0x20, 0x21,0x22,0x23,0x24, 0x25,0x26,0x27,0x28, 13 0x29,0x2a,0x2b,0x2c, 0x2d,0x2e,0x2f,0x30, 0x31,0x32,0x33,0xff, 0xff,0xff,0xff,0xff, 14 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 15 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 16 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 17 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 18 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 19 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 20 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 21 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 22 ]; 23 24 string utf8ToUtf7(string utf8Src) { 25 import std.conv : to; 26 import std.array : Appender; 27 28 Appender!string utf7Dst; 29 30 bool inAsciiMode = true; 31 32 // As we encode base64 we'll have potentially 0, 2 or 4 bits remaining to be encoded at any one 33 // time. remBits keeps them in its LSBs. 34 uint remBits = 0; 35 uint numRemBits = 0; 36 37 foreach (wch; utf8Src.to!wstring) { 38 // If input is ASCII then switch to ASCII mode if necessary first. 39 if (!inAsciiMode && wch < 0x7f) { 40 if (numRemBits > 0) { 41 utf7Dst.put(B64Enc[remBits << (6 - numRemBits)]); 42 } 43 utf7Dst.put('-'); 44 remBits = numRemBits = 0; 45 inAsciiMode = true; 46 } 47 48 // Special case the '&'. 49 if (wch == '&') { 50 utf7Dst.put("&-"); 51 continue; 52 } 53 54 // If input is ASCII then just copy it. 55 if (0x20 <= wch && wch <= 0x7e) { 56 utf7Dst.put(wch.to!char); 57 continue; 58 } 59 60 // Input is not ASCII. Switch to BASE64 mode if necessary. 61 if (inAsciiMode) { 62 utf7Dst.put('&'); 63 inAsciiMode = false; 64 } 65 66 // Add our new character to the remaining bits. 67 remBits = (remBits << 16) | wch.to!uint; 68 numRemBits += 16; 69 70 // Output base64 encoded chars while there are enough bits. 71 while (numRemBits >= 6) { 72 numRemBits -= 6; 73 utf7Dst.put(B64Enc[remBits >> numRemBits]); 74 remBits &= ((1 << numRemBits) - 1); 75 } 76 } 77 if (!inAsciiMode) { 78 if (numRemBits > 0) { 79 utf7Dst.put(B64Enc[remBits << (6 - numRemBits)]); 80 } 81 utf7Dst.put('-'); 82 } 83 84 return utf7Dst.data.to!string; 85 } 86 87 unittest { 88 void encodeTest(string input, string expected) { 89 import std.stdio : writeln; 90 string got = utf8ToUtf7(input); 91 if (got != expected) { 92 writeln("INPUT: ", input); 93 writeln("EXPECTING: ", expected); 94 writeln("GOT: ", got); 95 } 96 assert(utf8ToUtf7(input) == expected); 97 } 98 99 // From RFC 2152. 100 encodeTest("A≢Α.", "A&ImIDkQ-."); 101 encodeTest("Hi Mom -☺-!", "Hi Mom -&Jjo--!"); 102 encodeTest("日本語", "&ZeVnLIqe-"); 103 104 // Stolen shamelessly from the Factor runtime tests. 105 // https://github.com/factor/factor/blob/master/basis/io/encodings/utf7/utf7-tests.factor 106 encodeTest("~/bågø", "~/b&AOU-g&APg-"); 107 encodeTest("båx", "b&AOU-x"); 108 encodeTest("bøx", "b&APg-x"); 109 encodeTest("test", "test"); 110 encodeTest("Skräppost", "Skr&AOQ-ppost"); 111 encodeTest("Ting & Såger", "Ting &- S&AOU-ger"); 112 encodeTest("~/Følder/mailbåx & stuff + more", "~/F&APg-lder/mailb&AOU-x &- stuff + more"); 113 encodeTest("~peter/mail/日本語/台北", "~peter/mail/&ZeVnLIqe-/&U,BTFw-"); 114 } 115 116 117 string utf7ToUtf8(string utf7Src) { 118 import std.array : Appender; 119 import std.conv : to; 120 121 Appender!wstring unicodeDst; 122 123 bool inAsciiMode = true; 124 bool prevWasAmp = false; // A bit of a hack to handle the '&-' special case. 125 126 // As we decode base64 we'll buffer up bits until we have enough to output a unicode character. 127 uint bufBits = 0; 128 uint numBufBits = 0; 129 130 foreach (ch; utf7Src) { 131 // Copy ASCII characters directly. 132 if (inAsciiMode && ch >= 0x20 && ch <= 0x7e) { 133 if (ch != '&') { 134 unicodeDst.put(ch); 135 } else { 136 inAsciiMode = false; 137 prevWasAmp = true; 138 } 139 continue; 140 } 141 142 // It's an escaped code. Is it the end marker? 143 bool newAmp = prevWasAmp; 144 prevWasAmp = false; 145 if (ch == '-') { 146 if (newAmp) { 147 // Special case for '&-'. Hacky. :( 148 unicodeDst.put('&'); 149 } 150 bufBits = numBufBits = 0; 151 inAsciiMode = true; 152 continue; 153 } 154 155 // Decode UTF-7 character. 156 bufBits = (bufBits << 6) | B64Dec[ch]; 157 numBufBits += 6; 158 159 if (numBufBits >= 16) { 160 numBufBits -= 16; 161 unicodeDst.put(((bufBits >> numBufBits) & 0xffff).to!wchar); 162 bufBits &= ((1 << numBufBits) - 1); 163 } 164 } 165 166 return unicodeDst.data.to!string; 167 } 168 169 unittest { 170 void decodeTest(string input) { 171 import std.stdio : writeln; 172 import std.conv : to; 173 string utf7 = utf8ToUtf7(input); 174 string got = utf7ToUtf8(utf7); 175 if (got != input) { 176 writeln("INPUT: ", input); 177 writeln("UTF-7: ", utf7); 178 writeln("GOT: ", got); 179 } 180 assert(utf7ToUtf8(utf8ToUtf7(input)) == input); 181 } 182 183 // From RFC 2152. 184 decodeTest("A≢Α."); 185 decodeTest("Hi Mom -☺-!"); 186 decodeTest("日本語"); 187 188 // Stolen shamelessly from the Factor runtime tests. 189 // https://github.com/factor/factor/blob/master/basis/io/encodings/utf7/utf7-tests.factor 190 decodeTest("~/bågø"); 191 decodeTest("båx"); 192 decodeTest("bøx"); 193 decodeTest("test"); 194 decodeTest("Skräppost"); 195 decodeTest("Ting & Såger"); 196 decodeTest("~/Følder/mailbåx & stuff + more"); 197 decodeTest("~peter/mail/日本語/台北"); 198 } 199