1 ///
2 module imap.namespace;
3 
4 immutable B64Enc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
5 immutable ubyte[256] B64Dec = [
6     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
7     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
8     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x3e, 0x3f,0xff,0xff,0xff,
9     0x34,0x35,0x36,0x37, 0x38,0x39,0x3a,0x3b, 0x3c,0x3d,0xff,0xff, 0xff,0xff,0xff,0xff,
10     0xff,0x00,0x01,0x02, 0x03,0x04,0x05,0x06, 0x07,0x08,0x09,0x0a, 0x0b,0x0c,0x0d,0x0e,
11     0x0f,0x10,0x11,0x12, 0x13,0x14,0x15,0x16, 0x17,0x18,0x19,0xff, 0xff,0xff,0xff,0xff,
12     0xff,0x1a,0x1b,0x1c, 0x1d,0x1e,0x1f,0x20, 0x21,0x22,0x23,0x24, 0x25,0x26,0x27,0x28,
13     0x29,0x2a,0x2b,0x2c, 0x2d,0x2e,0x2f,0x30, 0x31,0x32,0x33,0xff, 0xff,0xff,0xff,0xff,
14     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
15     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
16     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
17     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
18     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
19     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
20     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
21     0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
22 ];
23 
24 string utf8ToUtf7(string utf8Src) {
25     import std.conv : to;
26     import std.array : Appender;
27 
28     Appender!string utf7Dst;
29 
30     bool inAsciiMode = true;
31 
32     // As we encode base64 we'll have potentially 0, 2 or 4 bits remaining to be encoded at any one
33     // time.  remBits keeps them in its LSBs.
34     uint remBits = 0;
35     uint numRemBits = 0;
36 
37     foreach (wch; utf8Src.to!wstring) {
38         // If input is ASCII then switch to ASCII mode if necessary first.
39         if (!inAsciiMode && wch < 0x7f) {
40             if (numRemBits > 0) {
41                 utf7Dst.put(B64Enc[remBits << (6 - numRemBits)]);
42             }
43             utf7Dst.put('-');
44             remBits = numRemBits = 0;
45             inAsciiMode = true;
46         }
47 
48         // Special case the '&'.
49         if (wch == '&') {
50             utf7Dst.put("&-");
51             continue;
52         }
53 
54         // If input is ASCII then just copy it.
55         if (0x20 <= wch && wch <= 0x7e) {
56             utf7Dst.put(wch.to!char);
57             continue;
58         }
59 
60         // Input is not ASCII.  Switch to BASE64 mode if necessary.
61         if (inAsciiMode) {
62             utf7Dst.put('&');
63             inAsciiMode = false;
64         }
65 
66         // Add our new character to the remaining bits.
67         remBits = (remBits << 16) | wch.to!uint;
68         numRemBits += 16;
69 
70         // Output base64 encoded chars while there are enough bits.
71         while (numRemBits >= 6) {
72             numRemBits -= 6;
73             utf7Dst.put(B64Enc[remBits >> numRemBits]);
74             remBits &= ((1 << numRemBits) - 1);
75         }
76     }
77     if (!inAsciiMode) {
78         if (numRemBits > 0) {
79             utf7Dst.put(B64Enc[remBits << (6 - numRemBits)]);
80         }
81         utf7Dst.put('-');
82     }
83 
84     return utf7Dst.data.to!string;
85 }
86 
87 unittest {
88     void encodeTest(string input, string expected) {
89         import std.stdio : writeln;
90         string got = utf8ToUtf7(input);
91         if (got != expected) {
92             writeln("INPUT:     ", input);
93             writeln("EXPECTING: ", expected);
94             writeln("GOT:       ", got);
95         }
96         assert(utf8ToUtf7(input) == expected);
97     }
98 
99     // From RFC 2152.
100     encodeTest("A≢Α.", "A&ImIDkQ-.");
101     encodeTest("Hi Mom -☺-!", "Hi Mom -&Jjo--!");
102     encodeTest("日本語", "&ZeVnLIqe-");
103 
104     // Stolen shamelessly from the Factor runtime tests.
105     // https://github.com/factor/factor/blob/master/basis/io/encodings/utf7/utf7-tests.factor
106     encodeTest("~/bågø", "~/b&AOU-g&APg-");
107     encodeTest("båx", "b&AOU-x");
108     encodeTest("bøx", "b&APg-x");
109     encodeTest("test", "test");
110     encodeTest("Skräppost", "Skr&AOQ-ppost");
111     encodeTest("Ting & Såger", "Ting &- S&AOU-ger");
112     encodeTest("~/Følder/mailbåx & stuff + more", "~/F&APg-lder/mailb&AOU-x &- stuff + more");
113     encodeTest("~peter/mail/日本語/台北", "~peter/mail/&ZeVnLIqe-/&U,BTFw-");
114 }
115 
116 
117 string utf7ToUtf8(string utf7Src) {
118     import std.array : Appender;
119     import std.conv : to;
120 
121     Appender!wstring unicodeDst;
122 
123     bool inAsciiMode = true;
124     bool prevWasAmp = false;    // A bit of a hack to handle the '&-' special case.
125 
126     // As we decode base64 we'll buffer up bits until we have enough to output a unicode character.
127     uint bufBits = 0;
128     uint numBufBits = 0;
129 
130     foreach (ch; utf7Src) {
131         // Copy ASCII characters directly.
132         if (inAsciiMode && ch >= 0x20 && ch <= 0x7e) {
133             if (ch != '&') {
134                 unicodeDst.put(ch);
135             } else {
136                 inAsciiMode = false;
137                 prevWasAmp = true;
138             }
139             continue;
140         }
141 
142         // It's an escaped code.  Is it the end marker?
143         bool newAmp = prevWasAmp;
144         prevWasAmp = false;
145         if (ch == '-') {
146             if (newAmp) {
147                 // Special case for '&-'.  Hacky. :(
148                 unicodeDst.put('&');
149             }
150             bufBits = numBufBits = 0;
151             inAsciiMode = true;
152             continue;
153         }
154 
155         // Decode UTF-7 character.
156         bufBits = (bufBits << 6) | B64Dec[ch];
157         numBufBits += 6;
158 
159         if (numBufBits >= 16) {
160             numBufBits -= 16;
161             unicodeDst.put(((bufBits >> numBufBits) & 0xffff).to!wchar);
162             bufBits &= ((1 << numBufBits) - 1);
163         }
164     }
165 
166     return unicodeDst.data.to!string;
167 }
168 
169 unittest {
170     void decodeTest(string input) {
171         import std.stdio : writeln;
172         import std.conv : to;
173         string utf7 = utf8ToUtf7(input);
174         string got = utf7ToUtf8(utf7);
175         if (got != input) {
176             writeln("INPUT: ", input);
177             writeln("UTF-7: ", utf7);
178             writeln("GOT:   ", got);
179         }
180         assert(utf7ToUtf8(utf8ToUtf7(input)) == input);
181     }
182 
183     // From RFC 2152.
184     decodeTest("A≢Α.");
185     decodeTest("Hi Mom -☺-!");
186     decodeTest("日本語");
187 
188     // Stolen shamelessly from the Factor runtime tests.
189     // https://github.com/factor/factor/blob/master/basis/io/encodings/utf7/utf7-tests.factor
190     decodeTest("~/bågø");
191     decodeTest("båx");
192     decodeTest("bøx");
193     decodeTest("test");
194     decodeTest("Skräppost");
195     decodeTest("Ting & Såger");
196     decodeTest("~/Følder/mailbåx & stuff + more");
197     decodeTest("~peter/mail/日本語/台北");
198 }
199