1 /// 2 module imap.namespace; 3 import std.conv : to; 4 5 enum UNICODE_REPLACEMENT_CHAR= to!int("fffd",16); 6 7 // Characters >= base require surrogates 8 enum UTF16_SURROGATE_BASE = to!int("10000",16); 9 10 enum UTF16_SURROGATE_SHIFT = 10; 11 enum UTF16_SURROGATE_MASK = to!int("03ff",16); 12 enum UTF16_SURROGATE_HIGH_FIRST = to!int("d800",16); 13 enum UTF16_SURROGATE_HIGH_LAST = to!int("dbff",16); 14 enum UTF16_SURROGATE_HIGH_MAX = to!int("dfff",16); 15 enum UTF16_SURROGATE_LOW_FIRST = to!int("dc00",16); 16 enum UTF16_SURROGATE_LOW_LAST = to!int("dfff",16); 17 18 auto UTF16_SURROGATE_HIGH(T:int)(T chr) 19 { 20 return (UTF16_SURROGATE_HIGH_FIRST + 21 (((chr) - UTF16_SURROGATE_BASE) >> UTF16_SURROGATE_SHIFT)); 22 } 23 24 auto UTF16_SURROGATE_LOW(T:int)(T chr) 25 { 26 return (UTF16_SURROGATE_LOW_FIRST + 27 (((chr) - UTF16_SURROGATE_BASE) & UTF16_SURROGATE_MASK)); 28 } 29 30 enum UTF8_REPLACEMENT_CHAR_LEN = 3; 31 32 char Hex(string s)() 33 { 34 import std.range : front; 35 import std.conv : to; 36 return s.to!int(16).to!char; 37 // static assert(ret.length == 1, "cannot convert " ~ s ~ "to a single character"); 38 // return ret; // .front; // .to!char; 39 } 40 41 immutable Base64EncodeTable = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; 42 immutable Base64DecodeTable = parseBase64DecodeTable(` 43 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 44 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 45 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, 63,XX,XX,XX, 46 52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX, 47 XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 48 15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX, 49 XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 50 41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX, 51 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 52 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 53 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 54 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 55 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 56 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 57 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 58 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX 59 `); 60 61 string parseBase64DecodeTable(string s) 62 { 63 import std.string : replace, split, strip; 64 import std.conv : to; 65 import std.algorithm : map; 66 import std.array : array; 67 68 return 69 s.replace("XX","FF") 70 .split(',') 71 .map!(tok => tok.strip.to!int(16).to!char) 72 .array 73 .to!string; 74 } 75 76 77 private char lookup(alias Table,Number:int)(Number c) 78 // if (is(Number == int) || is(Number == char)) 79 { 80 import std.conv : to; 81 import std.exception : enforce; 82 enforce(c >=0 && c < Table.length, "index error in lookup in " ~ Table.stringof ~ " for char: " ~ c.to!int.to!string); 83 return Table[c].to!char; 84 } 85 86 87 88 string modifiedBase64Encode(string src) 89 { 90 import std.array : Appender; 91 import std.conv : to; 92 Appender!string ret; 93 ret.put('&'); 94 while(src.length >= 3) 95 { 96 ret.put(lookup!Base64EncodeTable(src[0] >> 2)); 97 ret.put(lookup!Base64EncodeTable( ((src[0] &3) << 4) | (src[1] >> 4)).to!char); 98 ret.put(lookup!Base64EncodeTable( ((src[1] &Hex!"0f") << 2) | ((src[2] & Hex!"c0") >> 6)).to!char); 99 ret.put(lookup!Base64EncodeTable(src[2] & Hex!"3f")); 100 src = src[3..$]; 101 } 102 if (src.length > 0) 103 { 104 ret.put(lookup!Base64EncodeTable(src[0] >> 2)); 105 if (src.length == 1) 106 { 107 ret.put(lookup!Base64EncodeTable((src[0] & Hex!"03") <<4)); 108 } 109 else 110 { 111 ret.put(lookup!Base64EncodeTable(((src[0] & Hex!"03") <<4) | (src[1] >> 4))); 112 ret.put(lookup!Base64EncodeTable((src[1] & Hex!"0f") <<2)); 113 } 114 } 115 ret.put('-'); 116 return ret.data; 117 } 118 119 string imapUtf8FirstEncodeSubstring(string s) 120 { 121 string ret; 122 foreach(i,c;s) 123 { 124 if (c == '&' || (c < Hex!"020") || (c > Hex!"07f")) 125 return s[i .. $]; 126 } 127 return null; 128 } 129 130 /// Convert a unicode mailbox name to the modified UTF-7 encoding, according to RFC 3501 Section 5.1.3. 131 string utf8ToUtf7(string src) 132 { 133 import std.array : Appender; 134 import std.conv : to; 135 136 Appender!string ret; 137 auto p = imapUtf8FirstEncodeSubstring(src); 138 if (p.length == 0) // no characters to be encoded 139 return src; 140 141 // at least one encoded character 142 ret.put(src[src.length - p .length]); 143 size_t i = 0; 144 while( i < src.length) 145 { 146 auto c = src[i]; 147 if (c == '&') 148 { 149 ret.put("&-"); 150 continue; 151 } 152 if (c >= Hex!"020" && c < Hex!"07f") 153 { 154 ret.put(c); 155 continue; 156 } 157 158 Appender!string u; 159 while( i < src.length && src[i] < Hex!"20" && src[i] >= Hex!"7f") 160 { 161 auto chr = utf8GetChar(src[i..$]); 162 if (chr < UTF16_SURROGATE_BASE) 163 { 164 u.put((chr >> 8).to!char); 165 u.put((chr & Hex!"ff").to!char); 166 } 167 else 168 { 169 auto u16 = UTF16_SURROGATE_HIGH(chr); 170 u.put((u16 >> 8).to!char); 171 u.put((u16 & Hex!"ff").to!char); 172 u16 = UTF16_SURROGATE_LOW(chr); 173 u.put((u16 >> 8).to!char); 174 u.put((u16 & Hex!"ff").to!char); 175 } 176 i += utf8CharBytes(src[c]); 177 } 178 ret.put(modifiedBase64Encode(u.data)); 179 } 180 return ret.data; 181 } 182 183 bool isValidUtf7(dchar c) 184 { 185 return (c >= Hex!"020" && c < Hex!"7f"); 186 } 187 188 string utf16BufToUtf8(char[] output, uint pos_) 189 { 190 import std.exception :enforce; 191 uint pos = pos_; 192 ushort high,low; 193 char chr; 194 /+ 195 enforce(output.length <=4, "utf16BufToUtf8 requires input <= 4 chars, not " ~ output.length.to!string); 196 if (output.length % 2 != 0) 197 return null; 198 199 high = (output[pos %4] << 8) | output[(pos+1) % 4]; 200 if (high < UTF16_SURROGATE_HIGH_FIRST || high > UTF16_SURROGATE_HIGH_MAX) 201 { 202 // single byte 203 size_t oldlen = ret.length; 204 uni_ucs4_to_utf8_c(high,dest); 205 if (dest.length - oldlen == 1) 206 { 207 char last = 208 +/ 209 return ""; 210 } 211 212 string modifiedBase64DecodeToUtf8(string src) 213 { 214 import std.conv : to; 215 import std.array : Appender; 216 Appender!string ret; 217 char[4] input, output; 218 uint outstart, outpos; 219 220 size_t i =0; 221 while((src.length > i) && src[i] != '-') 222 { 223 input[0] = lookup!Base64DecodeTable(src[i]); 224 if (input[0] == Hex!"ff") 225 return null; 226 input[0] = lookup!Base64DecodeTable(src[i+1]); 227 if (input[1] == Hex!"ff") 228 return null; 229 230 output[outpos % 4] = ((input[0] << 2) | (input[1] >> 4)).to!char; 231 if (++outpos % 4 == outstart) 232 { 233 auto result = utf16BufToUtf8(output, outstart); 234 if (result is null) 235 return null; 236 else ret.put(result); 237 } 238 239 input[2] = lookup!Base64DecodeTable(src[i+2]); 240 if (input[2] == Hex!"ff") 241 { 242 if (src[i+2] != '-') 243 return null; 244 i += 2; 245 break; 246 } 247 output[outpos % 4] = ((input[1] << 4) | (input[2] >> 2)).to!char; 248 if (++outpos %4 == outstart) 249 { 250 auto result = utf16BufToUtf8(output, outstart); 251 if (result is null) 252 return null; 253 else 254 ret.put(result); 255 } 256 257 input[3] = lookup!Base64DecodeTable(src[i+3]); 258 if (input[3] == Hex!"ff") 259 { 260 if (src[i+3] != '-') 261 return null; 262 i +=3; 263 break; 264 } 265 266 output[outpos % 4] = (((input[2]) << 6) & Hex!"c0") | input[3]; 267 if (++outpos % 4 == outstart) 268 { 269 auto result = utf16BufToUtf8(output,outstart); 270 if (result is null) 271 return null; 272 else 273 ret.put(result); 274 } 275 i += 4; 276 } 277 if (outstart != outpos % 4) 278 { 279 auto len = (4 + outpos - outstart) % 4; 280 auto result = utf16BufToUtf8(output[0..len],outstart); 281 if (result is null) 282 return null; 283 else ret.put(result); 284 } 285 return ret.data; 286 } 287 288 /// Convert a mailbox name from the modified UTF-7 encoding, according to RFC 3501 Section 5.1.3. 289 string utf7ToUtf8(string src) 290 { 291 import std.array : Appender; 292 import std.algorithm : all, any; 293 import std.string : indexOf; 294 Appender!string ret; 295 296 bool isValid = src.all!(c => c.isValidUtf7); 297 if (!isValid) 298 return null; 299 300 auto j = src.indexOf('&'); 301 if (j == -1) // no encoded characters 302 return src; 303 304 if (j > 0) ret.put(src[0 .. j-1]); 305 306 size_t i = 0; 307 while(i < src.length) 308 { 309 auto c = src[i]; 310 if (c != '&') 311 { 312 ret.put(src[i++]); 313 } 314 else 315 { 316 if (src[++i] == '-') 317 { 318 ret.put('&'); 319 ++i; 320 } 321 else 322 { 323 auto result = modifiedBase64DecodeToUtf8(src[i..$]); 324 if (result is null) 325 return null; 326 else 327 ret.put(result); 328 if (src[i] == '&' && src[i+1] != '-') 329 return null; 330 } 331 } 332 } 333 return ret.data; 334 } 335 // at least one encoded character 336 private bool utf7IsValid(string src) 337 { 338 foreach(i,c;src) 339 { 340 if (c < Hex!"020" || c > Hex!"07f") 341 return false; 342 if (c == '&') 343 { 344 // slow scan 345 auto ret = utf7ToUtf8(src[i..$]); 346 if (ret is null) 347 return false; 348 } 349 } 350 return true; 351 } 352 353 354 355 /// 356 struct Mailbox 357 { 358 string mailbox; 359 string prefix = ""; 360 char delim = '/'; 361 362 /// 363 string toString() 364 { 365 return applyNamespace(); 366 } 367 368 /// Convert the names of personal mailboxes, using the namespace specified 369 /// by the mail server, from internal to mail server format. 370 string applyNamespace() 371 { 372 import std.experimental.logger : infof; 373 import std.string : toUpper, replace; 374 import std.format : format; 375 376 if (mailbox.toUpper != "INBOX") 377 return mailbox; 378 auto mbox = utf8ToUtf7(mailbox); 379 if ((prefix.length ==0) && ((delim=='\0') || delim=='/')) 380 return mbox; 381 auto ret = format!"%s%s"(prefix,mbox).replace("/",[delim]); 382 infof("namespace: '%s' -> '%s'\n", mbox, ret); 383 return ret; 384 } 385 //// Convert the names of personal mailboxes, using the namespace specified by 386 //// the mail server, from mail server format to internal format. 387 static Mailbox fromServerFormat(string mbox, string prefix, char delim) 388 { 389 Mailbox ret = { mailbox: mbox.reverseNamespace(prefix,delim), 390 prefix: prefix, 391 delim : delim, 392 }; 393 return ret; 394 395 } 396 } 397 398 string reverseNamespace(string mbox, string prefix, char delim) 399 { 400 import std.string : toUpper, replace; 401 int n; 402 char *c; 403 auto o = prefix.length; 404 auto mboxU = mbox.toUpper; 405 auto prefixU = prefix.toUpper; 406 407 if (mboxU == "INBOX") 408 return mbox; 409 410 if ((o == 0 && delim == '\0') || 411 (o == 0 && delim == '/')) 412 return utf7ToUtf8(mbox); 413 414 if (mbox.length >= prefix.length && mboxU[0..prefix.length] == prefixU) 415 o = 0; 416 417 return mbox[o..$] 418 .replace(delim,'/') 419 .utf7ToUtf8; 420 } 421 422 423 alias unichar_t = int; 424 private int utf8GetChar(string src_) // , char chr_r) 425 { 426 import std.exception : enforce; 427 import std.range : front; 428 429 enum lowest_valid_chr_table = [ 0, 430 0, 431 to!int("80",16), 432 to!int("800",16), 433 to!int("10000",16), 434 to!int("200000",16), 435 to!int("4000000",16), 436 ]; 437 438 string input = src_; 439 unichar_t lowest_valid_chr; 440 size_t i; 441 int ret; 442 enum max_len = cast(size_t) -1L; 443 444 if (input.front < Hex!"80") 445 { 446 return input.front; 447 } 448 449 // first byte has len highest bits set, followed by zero bit. 450 // the rest of the bits are used as the highest bits of the value 451 452 unichar_t chr = input.front; 453 size_t len = utf8CharBytes(chr); 454 switch (len) 455 { 456 case 2: 457 chr &= 0x1f; 458 break; 459 460 case 3: 461 chr &= 0x0f; 462 break; 463 464 case 4: 465 chr &= 0x07; 466 break; 467 468 case 5: 469 chr &= 0x03; 470 break; 471 472 case 6: 473 chr &= 0x01; 474 break; 475 476 default: 477 // only 7bit chars should have len==1 478 enforce(len == 1); 479 return -1; 480 } 481 482 if (len <= max_len) { 483 lowest_valid_chr = lowest_valid_chr_table[len]; 484 ret = 1; 485 } else { 486 // check first if the input is invalid before returning 0 487 lowest_valid_chr = 0; 488 ret = 0; 489 len = max_len; 490 } 491 492 // the following bytes must all be 10xxxxxx 493 for (i = 1; i < len; i++) 494 { 495 if ((input[i] & Hex!"c0") != Hex!"80") 496 return (input[i] == '\0') ? 0 : -1; 497 498 chr <<= 6; 499 chr |= input[i] & Hex!"3f"; 500 } 501 if (chr < lowest_valid_chr) { 502 /* overlong encoding */ 503 return -1; 504 } 505 506 return chr; 507 } 508 509 /// Returns the number of bytes belonging to this UTF-8 character. The given 510 /// parameter is the first byte of the UTF-8 sequence. Invalid input is 511 /// returned with length 1 512 private uint utf8CharBytes(int chr) 513 { 514 /* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */ 515 if (chr < (192 + 2)) 516 return 1; 517 return utf8_non1_bytes[chr - (192 + 2)]; 518 } 519 520 private const char[256-192-2] utf8_non1_bytes = [ 521 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 522 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 523 ]; 524 525 526 /+ 527 Permission is hereby granted, free of charge, to any person obtaining a 528 copy of this software and associated documentation files (the "Software"), 529 to deal in the Software without restriction, including without limitation 530 the rights to use, copy, modify, merge, publish, distribute, sublicense, 531 and/or sell copies of the Software, and to permit persons to whom the 532 Software is furnished to do so, subject to the following conditions: 533 534 The above copyright notice and this permission notice shall be included in 535 all copies or substantial portions of the Software. 536 537 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 538 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 539 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 540 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 541 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 542 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 543 DEALINGS IN THE SOFTWARE. 544 545 utf7 code taken from Dovecot by Timo Sirainen <tss@iki.fi> 546 +/