1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.CharacterSet; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import glib.c.functions; 31 public import glib.c.types; 32 33 34 /** */ 35 public struct CharacterSet 36 { 37 38 /** 39 * Converts a string from one character set to another. 40 * 41 * Note that you should use g_iconv() for streaming conversions. 42 * Despite the fact that @bytes_read can return information about partial 43 * characters, the g_convert_... functions are not generally suitable 44 * for streaming. If the underlying converter maintains internal state, 45 * then this won't be preserved across successive calls to g_convert(), 46 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 47 * this is the GNU C converter for CP1255 which does not emit a base 48 * character until it knows that the next character is not a mark that 49 * could combine with the base character.) 50 * 51 * Using extensions such as "//TRANSLIT" may not work (or may not work 52 * well) on many platforms. Consider using g_str_to_ascii() instead. 53 * 54 * Params: 55 * str = the string to convert. 56 * toCodeset = name of character set into which to convert @str 57 * fromCodeset = character set of @str. 58 * bytesRead = location to store the number of bytes in 59 * the input string that were successfully converted, or %NULL. 60 * Even if the conversion was successful, this may be 61 * less than @len if there were partial characters 62 * at the end of the input. If the error 63 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 64 * stored will be the byte offset after the last valid 65 * input sequence. 66 * 67 * Returns: If the conversion was successful, a newly allocated buffer 68 * containing the converted string, which must be freed with g_free(). 69 * Otherwise %NULL and @error will be set. 70 * 71 * Throws: GException on failure. 72 */ 73 public static string convert(string str, string toCodeset, string fromCodeset, out size_t bytesRead) 74 { 75 size_t bytesWritten; 76 GError* err = null; 77 78 auto retStr = g_convert(Str.toStringz(str), cast(ptrdiff_t)str.length, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err); 79 80 if (err !is null) 81 { 82 throw new GException( new ErrorG(err) ); 83 } 84 85 scope(exit) Str.freeString(retStr); 86 return Str.toString(retStr, bytesWritten); 87 } 88 89 /** */ 90 public static GQuark convertErrorQuark() 91 { 92 return g_convert_error_quark(); 93 } 94 95 /** 96 * Converts a string from one character set to another, possibly 97 * including fallback sequences for characters not representable 98 * in the output. Note that it is not guaranteed that the specification 99 * for the fallback sequences in @fallback will be honored. Some 100 * systems may do an approximate conversion from @from_codeset 101 * to @to_codeset in their iconv() functions, 102 * in which case GLib will simply return that approximate conversion. 103 * 104 * Note that you should use g_iconv() for streaming conversions. 105 * Despite the fact that @bytes_read can return information about partial 106 * characters, the g_convert_... functions are not generally suitable 107 * for streaming. If the underlying converter maintains internal state, 108 * then this won't be preserved across successive calls to g_convert(), 109 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 110 * this is the GNU C converter for CP1255 which does not emit a base 111 * character until it knows that the next character is not a mark that 112 * could combine with the base character.) 113 * 114 * Params: 115 * str = the string to convert. 116 * toCodeset = name of character set into which to convert @str 117 * fromCodeset = character set of @str. 118 * fallback = UTF-8 string to use in place of characters not 119 * present in the target encoding. (The string must be 120 * representable in the target encoding). 121 * If %NULL, characters not in the target encoding will 122 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy. 123 * bytesRead = location to store the number of bytes in 124 * the input string that were successfully converted, or %NULL. 125 * Even if the conversion was successful, this may be 126 * less than @len if there were partial characters 127 * at the end of the input. 128 * 129 * Returns: If the conversion was successful, a newly allocated buffer 130 * containing the converted string, which must be freed with g_free(). 131 * Otherwise %NULL and @error will be set. 132 * 133 * Throws: GException on failure. 134 */ 135 public static string convertWithFallback(string str, string toCodeset, string fromCodeset, string fallback, out size_t bytesRead) 136 { 137 size_t bytesWritten; 138 GError* err = null; 139 140 auto retStr = g_convert_with_fallback(Str.toStringz(str), cast(ptrdiff_t)str.length, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), &bytesRead, &bytesWritten, &err); 141 142 if (err !is null) 143 { 144 throw new GException( new ErrorG(err) ); 145 } 146 147 scope(exit) Str.freeString(retStr); 148 return Str.toString(retStr, bytesWritten); 149 } 150 151 /** 152 * Converts a string from one character set to another. 153 * 154 * Note that you should use g_iconv() for streaming conversions. 155 * Despite the fact that @bytes_read can return information about partial 156 * characters, the g_convert_... functions are not generally suitable 157 * for streaming. If the underlying converter maintains internal state, 158 * then this won't be preserved across successive calls to g_convert(), 159 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 160 * this is the GNU C converter for CP1255 which does not emit a base 161 * character until it knows that the next character is not a mark that 162 * could combine with the base character.) 163 * 164 * Characters which are valid in the input character set, but which have no 165 * representation in the output character set will result in a 166 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error. This is in contrast to the iconv() 167 * specification, which leaves this behaviour implementation defined. Note that 168 * this is the same error code as is returned for an invalid byte sequence in 169 * the input character set. To get defined behaviour for conversion of 170 * unrepresentable characters, use g_convert_with_fallback(). 171 * 172 * Params: 173 * str = the string to convert. 174 * converter = conversion descriptor from g_iconv_open() 175 * bytesRead = location to store the number of bytes in 176 * the input string that were successfully converted, or %NULL. 177 * Even if the conversion was successful, this may be 178 * less than @len if there were partial characters 179 * at the end of the input. If the error 180 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 181 * stored will be the byte offset after the last valid 182 * input sequence. 183 * 184 * Returns: If the conversion was successful, a newly allocated buffer 185 * containing the converted string, which must be freed with 186 * g_free(). Otherwise %NULL and @error will be set. 187 * 188 * Throws: GException on failure. 189 */ 190 public static string convertWithIconv(string str, GIConv converter, out size_t bytesRead) 191 { 192 size_t bytesWritten; 193 GError* err = null; 194 195 auto retStr = g_convert_with_iconv(Str.toStringz(str), cast(ptrdiff_t)str.length, converter, &bytesRead, &bytesWritten, &err); 196 197 if (err !is null) 198 { 199 throw new GException( new ErrorG(err) ); 200 } 201 202 scope(exit) Str.freeString(retStr); 203 return Str.toString(retStr, bytesWritten); 204 } 205 206 /** 207 * Returns the display basename for the particular filename, guaranteed 208 * to be valid UTF-8. The display name might not be identical to the filename, 209 * for instance there might be problems converting it to UTF-8, and some files 210 * can be translated in the display. 211 * 212 * If GLib cannot make sense of the encoding of @filename, as a last resort it 213 * replaces unknown characters with U+FFFD, the Unicode replacement character. 214 * You can search the result for the UTF-8 encoding of this character (which is 215 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 216 * encoding. 217 * 218 * You must pass the whole absolute pathname to this functions so that 219 * translation of well known locations can be done. 220 * 221 * This function is preferred over g_filename_display_name() if you know the 222 * whole path, as it allows translation. 223 * 224 * Params: 225 * filename = an absolute pathname in the 226 * GLib file name encoding 227 * 228 * Returns: a newly allocated string containing 229 * a rendition of the basename of the filename in valid UTF-8 230 * 231 * Since: 2.6 232 */ 233 public static string filenameDisplayBasename(string filename) 234 { 235 auto retStr = g_filename_display_basename(Str.toStringz(filename)); 236 237 scope(exit) Str.freeString(retStr); 238 return Str.toString(retStr); 239 } 240 241 /** 242 * Converts a filename into a valid UTF-8 string. The conversion is 243 * not necessarily reversible, so you should keep the original around 244 * and use the return value of this function only for display purposes. 245 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 246 * even if the filename actually isn't in the GLib file name encoding. 247 * 248 * If GLib cannot make sense of the encoding of @filename, as a last resort it 249 * replaces unknown characters with U+FFFD, the Unicode replacement character. 250 * You can search the result for the UTF-8 encoding of this character (which is 251 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 252 * encoding. 253 * 254 * If you know the whole pathname of the file you should use 255 * g_filename_display_basename(), since that allows location-based 256 * translation of filenames. 257 * 258 * Params: 259 * filename = a pathname hopefully in the 260 * GLib file name encoding 261 * 262 * Returns: a newly allocated string containing 263 * a rendition of the filename in valid UTF-8 264 * 265 * Since: 2.6 266 */ 267 public static string filenameDisplayName(string filename) 268 { 269 auto retStr = g_filename_display_name(Str.toStringz(filename)); 270 271 scope(exit) Str.freeString(retStr); 272 return Str.toString(retStr); 273 } 274 275 /** 276 * Converts a string from UTF-8 to the encoding GLib uses for 277 * filenames. Note that on Windows GLib uses UTF-8 for filenames; 278 * on other platforms, this function indirectly depends on the 279 * [current locale][setlocale]. 280 * 281 * The input string shall not contain nul characters even if the @len 282 * argument is positive. A nul character found inside the string will result 283 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. If the filename encoding is 284 * not UTF-8 and the conversion output contains a nul character, the error 285 * %G_CONVERT_ERROR_EMBEDDED_NUL is set and the function returns %NULL. 286 * 287 * Params: 288 * utf8string = a UTF-8 encoded string. 289 * len = the length of the string, or -1 if the string is 290 * nul-terminated. 291 * bytesRead = location to store the number of bytes in 292 * the input string that were successfully converted, or %NULL. 293 * Even if the conversion was successful, this may be 294 * less than @len if there were partial characters 295 * at the end of the input. If the error 296 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 297 * stored will be the byte offset after the last valid 298 * input sequence. 299 * bytesWritten = the number of bytes stored in 300 * the output buffer (not including the terminating nul). 301 * 302 * Returns: The converted string, or %NULL on an error. 303 * 304 * Throws: GException on failure. 305 */ 306 public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 307 { 308 GError* err = null; 309 310 auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 311 312 if (err !is null) 313 { 314 throw new GException( new ErrorG(err) ); 315 } 316 317 scope(exit) Str.freeString(retStr); 318 return Str.toString(retStr); 319 } 320 321 /** 322 * Converts a string which is in the encoding used by GLib for 323 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 324 * for filenames; on other platforms, this function indirectly depends on 325 * the [current locale][setlocale]. 326 * 327 * The input string shall not contain nul characters even if the @len 328 * argument is positive. A nul character found inside the string will result 329 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. 330 * If the source encoding is not UTF-8 and the conversion output contains a 331 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the 332 * function returns %NULL. Use g_convert() to produce output that 333 * may contain embedded nul characters. 334 * 335 * Params: 336 * opsysstring = a string in the encoding for filenames 337 * len = the length of the string, or -1 if the string is 338 * nul-terminated (Note that some encodings may allow nul 339 * bytes to occur inside strings. In that case, using -1 340 * for the @len parameter is unsafe) 341 * bytesRead = location to store the number of bytes in the 342 * input string that were successfully converted, or %NULL. 343 * Even if the conversion was successful, this may be 344 * less than @len if there were partial characters 345 * at the end of the input. If the error 346 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 347 * stored will be the byte offset after the last valid 348 * input sequence. 349 * bytesWritten = the number of bytes stored in the output 350 * buffer (not including the terminating nul). 351 * 352 * Returns: The converted string, or %NULL on an error. 353 * 354 * Throws: GException on failure. 355 */ 356 public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 357 { 358 GError* err = null; 359 360 auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err); 361 362 if (err !is null) 363 { 364 throw new GException( new ErrorG(err) ); 365 } 366 367 scope(exit) Str.freeString(retStr); 368 return Str.toString(retStr); 369 } 370 371 /** 372 * Obtains the character set for the [current locale][setlocale]; you 373 * might use this character set as an argument to g_convert(), to convert 374 * from the current locale's encoding to some other encoding. (Frequently 375 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) 376 * 377 * On Windows the character set returned by this function is the 378 * so-called system default ANSI code-page. That is the character set 379 * used by the "narrow" versions of C library and Win32 functions that 380 * handle file names. It might be different from the character set 381 * used by the C library's current locale. 382 * 383 * On Linux, the character set is found by consulting nl_langinfo() if 384 * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` 385 * and `CHARSET` are queried in order. 386 * 387 * The return value is %TRUE if the locale's encoding is UTF-8, in that 388 * case you can perhaps avoid calling g_convert(). 389 * 390 * The string returned in @charset is not allocated, and should not be 391 * freed. 392 * 393 * Params: 394 * charset = return location for character set 395 * name, or %NULL. 396 * 397 * Returns: %TRUE if the returned charset is UTF-8 398 */ 399 public static bool getCharset(out string charset) 400 { 401 char* outcharset = null; 402 403 auto __p = g_get_charset(&outcharset) != 0; 404 405 charset = Str.toString(outcharset); 406 407 return __p; 408 } 409 410 /** 411 * Gets the character set for the current locale. 412 * 413 * Returns: a newly allocated string containing the name 414 * of the character set. This string must be freed with g_free(). 415 */ 416 public static string getCodeset() 417 { 418 auto retStr = g_get_codeset(); 419 420 scope(exit) Str.freeString(retStr); 421 return Str.toString(retStr); 422 } 423 424 /** 425 * Determines the preferred character sets used for filenames. 426 * The first character set from the @charsets is the filename encoding, the 427 * subsequent character sets are used when trying to generate a displayable 428 * representation of a filename, see g_filename_display_name(). 429 * 430 * On Unix, the character sets are determined by consulting the 431 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. 432 * On Windows, the character set used in the GLib API is always UTF-8 433 * and said environment variables have no effect. 434 * 435 * `G_FILENAME_ENCODING` may be set to a comma-separated list of 436 * character set names. The special token "\@locale" is taken 437 * to mean the character set for the [current locale][setlocale]. 438 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, 439 * the character set of the current locale is taken as the filename 440 * encoding. If neither environment variable is set, UTF-8 is taken 441 * as the filename encoding, but the character set of the current locale 442 * is also put in the list of encodings. 443 * 444 * The returned @charsets belong to GLib and must not be freed. 445 * 446 * Note that on Unix, regardless of the locale character set or 447 * `G_FILENAME_ENCODING` value, the actual file names present 448 * on a system might be in any random encoding or just gibberish. 449 * 450 * Params: 451 * filenameCharsets = return location for the %NULL-terminated list of encoding names 452 * 453 * Returns: %TRUE if the filename encoding is UTF-8. 454 * 455 * Since: 2.6 456 */ 457 public static bool getFilenameCharsets(out string[] filenameCharsets) 458 { 459 char** outfilenameCharsets = null; 460 461 auto __p = g_get_filename_charsets(&outfilenameCharsets) != 0; 462 463 filenameCharsets = Str.toStringArray(outfilenameCharsets); 464 465 return __p; 466 } 467 468 /** 469 * Converts a string from UTF-8 to the encoding used for strings by 470 * the C runtime (usually the same as that used by the operating 471 * system) in the [current locale][setlocale]. On Windows this means 472 * the system codepage. 473 * 474 * The input string shall not contain nul characters even if the @len 475 * argument is positive. A nul character found inside the string will result 476 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert 477 * input that may contain embedded nul characters. 478 * 479 * Params: 480 * utf8string = a UTF-8 encoded string 481 * len = the length of the string, or -1 if the string is 482 * nul-terminated. 483 * bytesRead = location to store the number of bytes in the 484 * input string that were successfully converted, or %NULL. 485 * Even if the conversion was successful, this may be 486 * less than @len if there were partial characters 487 * at the end of the input. If the error 488 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 489 * stored will be the byte offset after the last valid 490 * input sequence. 491 * 492 * Returns: A newly-allocated buffer containing the converted string, 493 * or %NULL on an error, and error will be set. 494 * 495 * Throws: GException on failure. 496 */ 497 public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead) 498 { 499 size_t bytesWritten; 500 GError* err = null; 501 502 auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 503 504 if (err !is null) 505 { 506 throw new GException( new ErrorG(err) ); 507 } 508 509 scope(exit) Str.freeString(retStr); 510 return Str.toString(retStr, bytesWritten); 511 } 512 513 /** 514 * Converts a string which is in the encoding used for strings by 515 * the C runtime (usually the same as that used by the operating 516 * system) in the [current locale][setlocale] into a UTF-8 string. 517 * 518 * If the source encoding is not UTF-8 and the conversion output contains a 519 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the 520 * function returns %NULL. 521 * If the source encoding is UTF-8, an embedded nul character is treated with 522 * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with 523 * earlier versions of this library. Use g_convert() to produce output that 524 * may contain embedded nul characters. 525 * 526 * Params: 527 * opsysstring = a string in the 528 * encoding of the current locale. On Windows 529 * this means the system codepage. 530 * bytesRead = location to store the number of bytes in the 531 * input string that were successfully converted, or %NULL. 532 * Even if the conversion was successful, this may be 533 * less than @len if there were partial characters 534 * at the end of the input. If the error 535 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 536 * stored will be the byte offset after the last valid 537 * input sequence. 538 * bytesWritten = the number of bytes stored in the output 539 * buffer (not including the terminating nul). 540 * 541 * Returns: The converted string, or %NULL on an error. 542 * 543 * Throws: GException on failure. 544 */ 545 public static string localeToUtf8(string opsysstring, out size_t bytesRead, out size_t bytesWritten) 546 { 547 GError* err = null; 548 549 auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), cast(ptrdiff_t)opsysstring.length, &bytesRead, &bytesWritten, &err); 550 551 if (err !is null) 552 { 553 throw new GException( new ErrorG(err) ); 554 } 555 556 scope(exit) Str.freeString(retStr); 557 return Str.toString(retStr); 558 } 559 560 /** 561 * Computes a list of applicable locale names with a locale category name, 562 * which can be used to construct the fallback locale-dependent filenames 563 * or search paths. The returned list is sorted from most desirable to 564 * least desirable and always contains the default locale "C". 565 * 566 * This function consults the environment variables `LANGUAGE`, `LC_ALL`, 567 * @category_name, and `LANG` to find the list of locales specified by the 568 * user. 569 * 570 * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES"). 571 * 572 * Params: 573 * categoryName = a locale category name 574 * 575 * Returns: a %NULL-terminated array of strings owned by 576 * the thread g_get_language_names_with_category was called from. 577 * It must not be modified or freed. It must be copied if planned to be used in another thread. 578 * 579 * Since: 2.58 580 */ 581 public static string[] getLanguageNamesWithCategory(string categoryName) 582 { 583 return Str.toStringArray(g_get_language_names_with_category(Str.toStringz(categoryName))); 584 } 585 586 /** 587 * Obtains the character set used by the console attached to the process, 588 * which is suitable for printing output to the terminal. 589 * 590 * Usually this matches the result returned by g_get_charset(), but in 591 * environments where the locale's character set does not match the encoding 592 * of the console this function tries to guess a more suitable value instead. 593 * 594 * On Windows the character set returned by this function is the 595 * output code page used by the console associated with the calling process. 596 * If the codepage can't be determined (for example because there is no 597 * console attached) UTF-8 is assumed. 598 * 599 * The return value is %TRUE if the locale's encoding is UTF-8, in that 600 * case you can perhaps avoid calling g_convert(). 601 * 602 * The string returned in @charset is not allocated, and should not be 603 * freed. 604 * 605 * Params: 606 * charset = return location for character set 607 * name, or %NULL. 608 * 609 * Returns: %TRUE if the returned charset is UTF-8 610 * 611 * Since: 2.62 612 */ 613 public static bool getConsoleCharset(out string charset) 614 { 615 char* outcharset = null; 616 617 auto __p = g_get_console_charset(&outcharset) != 0; 618 619 charset = Str.toString(outcharset); 620 621 return __p; 622 } 623 }