imap.namespace source code

1 ///
2 module imap.namespace;
3 import std.conv : to;
4 
5 enum UNICODE_REPLACEMENT_CHAR= to!int("fffd",16);
6 
7 // Characters >= base require surrogates
8 enum UTF16_SURROGATE_BASE = to!int("10000",16);
9 
10 enum UTF16_SURROGATE_SHIFT = 10;
11 enum UTF16_SURROGATE_MASK = to!int("03ff",16);
12 enum UTF16_SURROGATE_HIGH_FIRST = to!int("d800",16);
13 enum UTF16_SURROGATE_HIGH_LAST = to!int("dbff",16);
14 enum UTF16_SURROGATE_HIGH_MAX = to!int("dfff",16);
15 enum UTF16_SURROGATE_LOW_FIRST = to!int("dc00",16);
16 enum UTF16_SURROGATE_LOW_LAST = to!int("dfff",16);
17 
18 auto UTF16_SURROGATE_HIGH(T:int)(T chr)
19 {
20 	return (UTF16_SURROGATE_HIGH_FIRST + 
21 		 	 (((chr) - UTF16_SURROGATE_BASE) >> UTF16_SURROGATE_SHIFT));
22 }
23 
24 auto UTF16_SURROGATE_LOW(T:int)(T chr)
25 {
26 	return (UTF16_SURROGATE_LOW_FIRST + 
27 		 	 (((chr) - UTF16_SURROGATE_BASE) & UTF16_SURROGATE_MASK));
28 }
29 
30 enum UTF8_REPLACEMENT_CHAR_LEN = 3;
31 
32 char Hex(string s)()
33 {
34 	import std.range : front;
35 	import std.conv : to;
36 	return s.to!int(16).to!char;
37 	// static assert(ret.length == 1, "cannot convert " ~ s ~ "to a single character");
38 	// return ret; // .front; // .to!char;
39 }
40 
41 immutable Base64EncodeTable = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
42 immutable Base64DecodeTable = parseBase64DecodeTable(`
43 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
44 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
45 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, 63,XX,XX,XX,
46 	52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX,
47 	XX, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
48 	15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX,
49 	XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
50 	41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX,
51 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
52 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
53 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
54 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
55 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
56 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
57 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
58 	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX
59 `);
60 
61 string parseBase64DecodeTable(string s)
62 {
63 	import std.string : replace, split, strip;
64 	import std.conv : to;
65 	import std.algorithm : map;
66 	import std.array : array;
67 
68 	return
69 		s.replace("XX","FF")
70 			.split(',')
71 			.map!(tok => tok.strip.to!int(16).to!char)
72 			.array
73 			.to!string;
74 }
75 
76 
77 private char lookup(alias Table,Number:int)(Number c)
78 // if (is(Number == int) || is(Number == char))
79 {
80 	import std.conv : to;
81 	import std.exception : enforce;
82 	enforce(c >=0 && c < Table.length, "index error in lookup in " ~ Table.stringof ~ " for char: " ~ c.to!int.to!string);
83 	return Table[c].to!char;
84 }
85 
86 
87 
88 string modifiedBase64Encode(string src)
89 {
90 	import std.array : Appender;
91 	import std.conv : to;
92 	Appender!string ret;
93 	ret.put('&');
94 	while(src.length >= 3)
95 	{
96 		ret.put(lookup!Base64EncodeTable(src[0] >> 2));
97 		ret.put(lookup!Base64EncodeTable( ((src[0] &3) << 4) | (src[1] >> 4)).to!char);
98 		ret.put(lookup!Base64EncodeTable( ((src[1] &Hex!"0f") << 2) | ((src[2] & Hex!"c0") >> 6)).to!char);
99 		ret.put(lookup!Base64EncodeTable(src[2] & Hex!"3f"));
100 		src = src[3..$];
101 	}
102 	if (src.length > 0)
103 	{
104 		ret.put(lookup!Base64EncodeTable(src[0] >> 2));
105 		if (src.length == 1)
106 		{
107 			ret.put(lookup!Base64EncodeTable((src[0] & Hex!"03") <<4));
108 		}
109 		else
110 		{
111 			ret.put(lookup!Base64EncodeTable(((src[0] & Hex!"03") <<4) | (src[1] >> 4)));
112 			ret.put(lookup!Base64EncodeTable((src[1] & Hex!"0f") <<2));
113 		}
114 	}
115 	ret.put('-');
116 	return ret.data;
117 }
118 
119 string imapUtf8FirstEncodeSubstring(string s)
120 {
121 	string ret;
122 	foreach(i,c;s)
123 	{
124 		if (c == '&' || (c < Hex!"020") || (c > Hex!"07f"))
125 			return s[i .. $];
126 	}
127 	return null;
128 }
129 
130 /// Convert a unicode mailbox name to the modified UTF-7 encoding, according to RFC 3501 Section 5.1.3.
131 string utf8ToUtf7(string src)
132 {
133 	import std.array : Appender;
134 	import std.conv : to;
135 
136 	Appender!string ret;
137 	auto p = imapUtf8FirstEncodeSubstring(src);
138 	if (p.length == 0) // no characters to be encoded
139 		return src;
140 
141 	// at least one encoded character
142 	ret.put(src[src.length - p .length]);
143 	size_t i = 0;
144 	while( i < src.length)
145 	{
146 		auto c = src[i];
147 		if (c == '&')
148 		{
149 			ret.put("&-");
150 			continue;
151 		}
152 		if (c >= Hex!"020" && c < Hex!"07f")
153 		{
154 			ret.put(c);
155 			continue;
156 		}
157 
158 		Appender!string u;
159 		while( i < src.length && src[i] < Hex!"20" && src[i] >= Hex!"7f")
160 		{
161 			auto chr = utf8GetChar(src[i..$]);
162 			if (chr < UTF16_SURROGATE_BASE)
163 			{
164 				u.put((chr >> 8).to!char);
165 				u.put((chr & Hex!"ff").to!char);
166 			}
167 			else
168 			{
169 				auto u16 = UTF16_SURROGATE_HIGH(chr);
170 				u.put((u16 >> 8).to!char);
171 				u.put((u16 & Hex!"ff").to!char);
172 				u16 = UTF16_SURROGATE_LOW(chr);
173 				u.put((u16 >> 8).to!char);
174 				u.put((u16 & Hex!"ff").to!char);
175 			}
176 			i += utf8CharBytes(src[c]);
177 		}
178 		ret.put(modifiedBase64Encode(u.data));
179 	}
180 	return ret.data;
181 }
182 
183 bool isValidUtf7(dchar c)
184 {
185 	return (c >= Hex!"020" && c < Hex!"7f");
186 }
187 
188 string utf16BufToUtf8(char[] output, uint pos_)
189 {
190 	import std.exception :enforce;
191 	uint pos = pos_;
192 	ushort high,low;
193 	char chr;
194 /+
195 	enforce(output.length <=4, "utf16BufToUtf8 requires input <= 4 chars, not " ~ output.length.to!string);
196 	if (output.length % 2 != 0)
197 		return null;
198 
199 	high = (output[pos %4] << 8) | output[(pos+1) % 4];
200 	if (high < UTF16_SURROGATE_HIGH_FIRST || high > UTF16_SURROGATE_HIGH_MAX)
201 	{
202 		// single byte
203 		size_t oldlen = ret.length;
204 		uni_ucs4_to_utf8_c(high,dest);
205 		if (dest.length - oldlen == 1)
206 		{
207 			char last = 
208 				+/
209 	return "";
210 }
211 
212 string modifiedBase64DecodeToUtf8(string src)
213 {
214 	import std.conv : to;
215 	import std.array : Appender;
216 	Appender!string ret;
217 	char[4] input, output;
218 	uint outstart, outpos;
219 
220 	size_t i =0;
221 	while((src.length > i) && src[i] != '-')
222 	{
223 		input[0] = lookup!Base64DecodeTable(src[i]);
224 		if (input[0] == Hex!"ff")
225 			return null;
226 		input[0] = lookup!Base64DecodeTable(src[i+1]);
227 		if (input[1] == Hex!"ff")
228 			return null;
229 
230 		output[outpos % 4] = ((input[0] << 2) | (input[1] >> 4)).to!char;
231 		if (++outpos % 4 == outstart)
232 		{
233 			auto result = utf16BufToUtf8(output, outstart);
234 			if (result is null)
235 				return null;
236 			else ret.put(result);
237 		}
238 
239 		input[2] = lookup!Base64DecodeTable(src[i+2]);
240 		if (input[2] == Hex!"ff")
241 		{
242 			if (src[i+2] != '-')
243 				return null;
244 			i += 2;
245 			break;
246 		}
247 		output[outpos % 4] = ((input[1] << 4) | (input[2] >> 2)).to!char;
248 		if (++outpos %4 == outstart)
249 		{
250 			auto result = utf16BufToUtf8(output, outstart);
251 			if (result is null)
252 				return null;
253 			else
254 				ret.put(result);
255 		}
256 
257 		input[3] = lookup!Base64DecodeTable(src[i+3]);
258 		if (input[3] == Hex!"ff")
259 		{
260 			if (src[i+3] != '-')
261 				return null;
262 			i +=3;
263 			break;
264 		}
265 
266 		output[outpos % 4] = (((input[2]) << 6) & Hex!"c0") | input[3];
267 		if (++outpos % 4 == outstart)
268 		{
269 			auto result = utf16BufToUtf8(output,outstart);
270 			if (result is null)
271 				return null;
272 			else
273 				ret.put(result);
274 		}
275 		i += 4;
276 	}
277 	if (outstart != outpos % 4)
278 	{
279 		auto len  = (4 + outpos - outstart) % 4;
280 		auto result = utf16BufToUtf8(output[0..len],outstart);
281 		if (result is null)
282 			return null;
283 		else ret.put(result);
284 	}
285 	return ret.data;
286 }
287 	
288 /// Convert a mailbox name from the modified UTF-7 encoding, according to RFC 3501 Section 5.1.3.
289 string utf7ToUtf8(string src)
290 {
291 	import std.array : Appender;
292 	import std.algorithm : all, any;
293 	import std.string : indexOf;
294 	Appender!string ret;
295 
296 	bool isValid = src.all!(c => c.isValidUtf7);
297 	if (!isValid)
298 		return null;
299 
300 	auto j = src.indexOf('&');
301 	if (j == -1) // no encoded characters
302 		return src;
303 
304 	if (j > 0) ret.put(src[0 .. j-1]);
305 
306 	size_t i = 0;
307 	while(i < src.length)
308 	{
309 		auto c = src[i];
310 		if (c != '&')
311 		{
312 			ret.put(src[i++]);
313 		}
314 		else
315 		{
316 			if (src[++i] == '-')
317 			{
318 				ret.put('&');
319 				++i;
320 			}
321 			else
322 			{
323 				auto result = modifiedBase64DecodeToUtf8(src[i..$]);
324 				if (result is null)
325 					return null;
326 				else
327 					ret.put(result);
328 				if (src[i] == '&' && src[i+1] != '-')
329 					return null;
330 			}
331 		}
332 	}
333 	return ret.data;
334 }
335 	// at least one encoded character
336 private bool utf7IsValid(string src)
337 {
338 	foreach(i,c;src)
339 	{
340 		if (c < Hex!"020" || c > Hex!"07f")
341 			return false;
342 		if (c == '&')
343 		{
344 			// slow scan
345 			auto ret = utf7ToUtf8(src[i..$]);
346 			if (ret is null)
347 				return false;
348 		}
349 	}
350 	return true;
351 }
352 
353 
354 
355 ///
356 struct Mailbox
357 {
358 	string mailbox;
359 	string prefix = "";
360 	char delim = '/';
361 
362 	///
363 	string toString()
364 	{
365 		return applyNamespace();
366 	}
367 
368 	/// Convert the names of personal mailboxes, using the namespace specified 
369 	/// by the mail server, from internal to mail server format.
370 	string applyNamespace()
371 	{
372 		import std.experimental.logger : infof;
373 		import std.string : toUpper, replace;
374 		import std.format : format;
375 
376 		if (mailbox.toUpper != "INBOX")
377 			return mailbox;
378 		auto mbox = utf8ToUtf7(mailbox);
379 		if ((prefix.length ==0) && ((delim=='\0') || delim=='/'))
380 			return mbox;
381 		auto ret = format!"%s%s"(prefix,mbox).replace("/",[delim]);
382 		infof("namespace: '%s' -> '%s'\n", mbox, ret);
383 		return ret;
384 	}
385 	//// Convert the names of personal mailboxes, using the namespace specified by
386 	//// the mail server, from mail server format to internal format.
387 	static Mailbox fromServerFormat(string mbox, string prefix, char delim)
388 	{
389 		Mailbox ret = {	mailbox: mbox.reverseNamespace(prefix,delim),
390 						prefix: prefix,
391 						delim : delim,
392 		};
393 		return ret;
394 
395 	}
396 }
397 
398 string reverseNamespace(string mbox, string prefix, char delim)
399 {
400 	import std.string : toUpper, replace;
401 	int n;
402 	char *c;
403 	auto o = prefix.length;
404 	auto mboxU = mbox.toUpper;
405 	auto prefixU = prefix.toUpper;
406 
407 	if (mboxU == "INBOX")
408 		return mbox;
409 
410 	if ((o == 0 && delim == '\0') ||
411 	    (o == 0 && delim == '/'))
412 		return utf7ToUtf8(mbox);
413 
414 	if (mbox.length >= prefix.length && mboxU[0..prefix.length] == prefixU)
415 		o = 0;
416 
417 	return mbox[o..$]
418 			.replace(delim,'/')
419 			.utf7ToUtf8;
420 }
421 
422 
423 alias unichar_t = int;
424 private int utf8GetChar(string src_) // , char chr_r)
425 {
426 	import std.exception : enforce;
427 	import std.range : front;
428 
429 	enum lowest_valid_chr_table = [ 0, 
430 									0,
431 									to!int("80",16),
432 									to!int("800",16),
433 									to!int("10000",16),
434 									to!int("200000",16),
435 								    to!int("4000000",16),
436   	];
437 
438 	string input = src_;
439 	unichar_t lowest_valid_chr;
440 	size_t i;
441 	int ret;
442 	enum max_len = cast(size_t) -1L;
443 
444 	if (input.front < Hex!"80")
445 	{
446 		return input.front;
447 	}
448 
449 	// first byte has len highest bits set, followed by zero bit.
450 	// the rest of the bits are used as the highest bits of the value
451 
452 	unichar_t chr = input.front;
453 	size_t len = utf8CharBytes(chr);
454 	switch (len)
455 	{
456 		case 2:
457 			chr &= 0x1f;
458 			break;
459 
460 		case 3:
461 			chr &= 0x0f;
462 			break;
463 
464 		case 4:
465 			chr &= 0x07;
466 			break;
467 
468 		case 5:
469 			chr &= 0x03;
470 			break;
471 
472 		case 6:
473 			chr &= 0x01;
474 			break;
475 
476 		default:
477 			// only 7bit chars should have len==1
478 			enforce(len == 1);
479 			return -1;
480 	}
481 
482 	if (len <= max_len) {
483 		lowest_valid_chr = lowest_valid_chr_table[len];
484 		ret = 1;
485 	} else {
486 		// check first if the input is invalid before returning 0
487 		lowest_valid_chr = 0;
488 		ret = 0;
489 		len = max_len;
490 	}
491 
492 	// the following bytes must all be 10xxxxxx
493 	for (i = 1; i < len; i++)
494 	{
495 		if ((input[i] & Hex!"c0") != Hex!"80")
496 			return (input[i] == '\0') ? 0 : -1;
497 
498 		chr <<= 6;
499 		chr |= input[i] & Hex!"3f";
500 	}
501 	if (chr < lowest_valid_chr) {
502 		/* overlong encoding */
503 		return -1;
504 	}
505 
506 	return chr;
507 }
508 
509 /// Returns the number of bytes belonging to this UTF-8 character. The given
510 /// parameter is the first byte of the UTF-8 sequence. Invalid input is
511 /// returned with length 1
512 private uint utf8CharBytes(int chr)
513 {
514 	/* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */
515 	if (chr < (192 + 2))
516 		return 1;
517 	return utf8_non1_bytes[chr - (192 + 2)];
518 }
519 
520 private const char[256-192-2] utf8_non1_bytes = [
521 		2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
522 		3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
523 ];
524 
525 
526 /+
527 	Permission is hereby granted, free of charge, to any person obtaining a
528 	copy of this software and associated documentation files (the "Software"),
529 	 to deal in the Software without restriction, including without limitation
530 	 the rights to use, copy, modify, merge, publish, distribute, sublicense,
531 	 and/or sell copies of the Software, and to permit persons to whom the
532 	 Software is furnished to do so, subject to the following conditions:
533 
534 	 The above copyright notice and this permission notice shall be included in
535 	 all copies or substantial portions of the Software.
536 
537 	 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
538 	 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
539 	 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
540 	 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
541 	 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
542 	 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
543 	 DEALINGS IN THE SOFTWARE.
544 
545 	 utf7 code taken from Dovecot by Timo Sirainen <tss@iki.fi>
546 +/