1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.CharacterSet; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import glib.c.functions; 31 public import glib.c.types; 32 public import gtkc.glibtypes; 33 34 35 /** */ 36 public struct CharacterSet 37 { 38 39 /** 40 * Converts a string from one character set to another. 41 * 42 * Note that you should use g_iconv() for streaming conversions. 43 * Despite the fact that @bytes_read can return information about partial 44 * characters, the g_convert_... functions are not generally suitable 45 * for streaming. If the underlying converter maintains internal state, 46 * then this won't be preserved across successive calls to g_convert(), 47 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 48 * this is the GNU C converter for CP1255 which does not emit a base 49 * character until it knows that the next character is not a mark that 50 * could combine with the base character.) 51 * 52 * Using extensions such as "//TRANSLIT" may not work (or may not work 53 * well) on many platforms. Consider using g_str_to_ascii() instead. 54 * 55 * Params: 56 * str = the string to convert. 57 * toCodeset = name of character set into which to convert @str 58 * fromCodeset = character set of @str. 59 * bytesRead = location to store the number of bytes in 60 * the input string that were successfully converted, or %NULL. 61 * Even if the conversion was successful, this may be 62 * less than @len if there were partial characters 63 * at the end of the input. If the error 64 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 65 * stored will be the byte offset after the last valid 66 * input sequence. 67 * 68 * Returns: If the conversion was successful, a newly allocated buffer 69 * containing the converted string, which must be freed with g_free(). 70 * Otherwise %NULL and @error will be set. 71 * 72 * Throws: GException on failure. 73 */ 74 public static string convert(string str, string toCodeset, string fromCodeset, out size_t bytesRead) 75 { 76 size_t bytesWritten; 77 GError* err = null; 78 79 auto retStr = g_convert(Str.toStringz(str), cast(ptrdiff_t)str.length, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err); 80 81 if (err !is null) 82 { 83 throw new GException( new ErrorG(err) ); 84 } 85 86 scope(exit) Str.freeString(retStr); 87 return Str.toString(retStr, bytesWritten); 88 } 89 90 /** */ 91 public static GQuark convertErrorQuark() 92 { 93 return g_convert_error_quark(); 94 } 95 96 /** 97 * Converts a string from one character set to another, possibly 98 * including fallback sequences for characters not representable 99 * in the output. Note that it is not guaranteed that the specification 100 * for the fallback sequences in @fallback will be honored. Some 101 * systems may do an approximate conversion from @from_codeset 102 * to @to_codeset in their iconv() functions, 103 * in which case GLib will simply return that approximate conversion. 104 * 105 * Note that you should use g_iconv() for streaming conversions. 106 * Despite the fact that @bytes_read can return information about partial 107 * characters, the g_convert_... functions are not generally suitable 108 * for streaming. If the underlying converter maintains internal state, 109 * then this won't be preserved across successive calls to g_convert(), 110 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 111 * this is the GNU C converter for CP1255 which does not emit a base 112 * character until it knows that the next character is not a mark that 113 * could combine with the base character.) 114 * 115 * Params: 116 * str = the string to convert. 117 * toCodeset = name of character set into which to convert @str 118 * fromCodeset = character set of @str. 119 * fallback = UTF-8 string to use in place of characters not 120 * present in the target encoding. (The string must be 121 * representable in the target encoding). 122 * If %NULL, characters not in the target encoding will 123 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy. 124 * bytesRead = location to store the number of bytes in 125 * the input string that were successfully converted, or %NULL. 126 * Even if the conversion was successful, this may be 127 * less than @len if there were partial characters 128 * at the end of the input. 129 * 130 * Returns: If the conversion was successful, a newly allocated buffer 131 * containing the converted string, which must be freed with g_free(). 132 * Otherwise %NULL and @error will be set. 133 * 134 * Throws: GException on failure. 135 */ 136 public static string convertWithFallback(string str, string toCodeset, string fromCodeset, string fallback, out size_t bytesRead) 137 { 138 size_t bytesWritten; 139 GError* err = null; 140 141 auto retStr = g_convert_with_fallback(Str.toStringz(str), cast(ptrdiff_t)str.length, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), &bytesRead, &bytesWritten, &err); 142 143 if (err !is null) 144 { 145 throw new GException( new ErrorG(err) ); 146 } 147 148 scope(exit) Str.freeString(retStr); 149 return Str.toString(retStr, bytesWritten); 150 } 151 152 /** 153 * Converts a string from one character set to another. 154 * 155 * Note that you should use g_iconv() for streaming conversions. 156 * Despite the fact that @bytes_read can return information about partial 157 * characters, the g_convert_... functions are not generally suitable 158 * for streaming. If the underlying converter maintains internal state, 159 * then this won't be preserved across successive calls to g_convert(), 160 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 161 * this is the GNU C converter for CP1255 which does not emit a base 162 * character until it knows that the next character is not a mark that 163 * could combine with the base character.) 164 * 165 * Characters which are valid in the input character set, but which have no 166 * representation in the output character set will result in a 167 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error. This is in contrast to the iconv() 168 * specification, which leaves this behaviour implementation defined. Note that 169 * this is the same error code as is returned for an invalid byte sequence in 170 * the input character set. To get defined behaviour for conversion of 171 * unrepresentable characters, use g_convert_with_fallback(). 172 * 173 * Params: 174 * str = the string to convert. 175 * converter = conversion descriptor from g_iconv_open() 176 * bytesRead = location to store the number of bytes in 177 * the input string that were successfully converted, or %NULL. 178 * Even if the conversion was successful, this may be 179 * less than @len if there were partial characters 180 * at the end of the input. If the error 181 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 182 * stored will be the byte offset after the last valid 183 * input sequence. 184 * 185 * Returns: If the conversion was successful, a newly allocated buffer 186 * containing the converted string, which must be freed with 187 * g_free(). Otherwise %NULL and @error will be set. 188 * 189 * Throws: GException on failure. 190 */ 191 public static string convertWithIconv(string str, GIConv converter, out size_t bytesRead) 192 { 193 size_t bytesWritten; 194 GError* err = null; 195 196 auto retStr = g_convert_with_iconv(Str.toStringz(str), cast(ptrdiff_t)str.length, converter, &bytesRead, &bytesWritten, &err); 197 198 if (err !is null) 199 { 200 throw new GException( new ErrorG(err) ); 201 } 202 203 scope(exit) Str.freeString(retStr); 204 return Str.toString(retStr, bytesWritten); 205 } 206 207 /** 208 * Returns the display basename for the particular filename, guaranteed 209 * to be valid UTF-8. The display name might not be identical to the filename, 210 * for instance there might be problems converting it to UTF-8, and some files 211 * can be translated in the display. 212 * 213 * If GLib cannot make sense of the encoding of @filename, as a last resort it 214 * replaces unknown characters with U+FFFD, the Unicode replacement character. 215 * You can search the result for the UTF-8 encoding of this character (which is 216 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 217 * encoding. 218 * 219 * You must pass the whole absolute pathname to this functions so that 220 * translation of well known locations can be done. 221 * 222 * This function is preferred over g_filename_display_name() if you know the 223 * whole path, as it allows translation. 224 * 225 * Params: 226 * filename = an absolute pathname in the 227 * GLib file name encoding 228 * 229 * Returns: a newly allocated string containing 230 * a rendition of the basename of the filename in valid UTF-8 231 * 232 * Since: 2.6 233 */ 234 public static string filenameDisplayBasename(string filename) 235 { 236 auto retStr = g_filename_display_basename(Str.toStringz(filename)); 237 238 scope(exit) Str.freeString(retStr); 239 return Str.toString(retStr); 240 } 241 242 /** 243 * Converts a filename into a valid UTF-8 string. The conversion is 244 * not necessarily reversible, so you should keep the original around 245 * and use the return value of this function only for display purposes. 246 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 247 * even if the filename actually isn't in the GLib file name encoding. 248 * 249 * If GLib cannot make sense of the encoding of @filename, as a last resort it 250 * replaces unknown characters with U+FFFD, the Unicode replacement character. 251 * You can search the result for the UTF-8 encoding of this character (which is 252 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 253 * encoding. 254 * 255 * If you know the whole pathname of the file you should use 256 * g_filename_display_basename(), since that allows location-based 257 * translation of filenames. 258 * 259 * Params: 260 * filename = a pathname hopefully in the 261 * GLib file name encoding 262 * 263 * Returns: a newly allocated string containing 264 * a rendition of the filename in valid UTF-8 265 * 266 * Since: 2.6 267 */ 268 public static string filenameDisplayName(string filename) 269 { 270 auto retStr = g_filename_display_name(Str.toStringz(filename)); 271 272 scope(exit) Str.freeString(retStr); 273 return Str.toString(retStr); 274 } 275 276 /** 277 * Converts a string from UTF-8 to the encoding GLib uses for 278 * filenames. Note that on Windows GLib uses UTF-8 for filenames; 279 * on other platforms, this function indirectly depends on the 280 * [current locale][setlocale]. 281 * 282 * The input string shall not contain nul characters even if the @len 283 * argument is positive. A nul character found inside the string will result 284 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. If the filename encoding is 285 * not UTF-8 and the conversion output contains a nul character, the error 286 * %G_CONVERT_ERROR_EMBEDDED_NUL is set and the function returns %NULL. 287 * 288 * Params: 289 * utf8string = a UTF-8 encoded string. 290 * len = the length of the string, or -1 if the string is 291 * nul-terminated. 292 * bytesRead = location to store the number of bytes in 293 * the input string that were successfully converted, or %NULL. 294 * Even if the conversion was successful, this may be 295 * less than @len if there were partial characters 296 * at the end of the input. If the error 297 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 298 * stored will be the byte offset after the last valid 299 * input sequence. 300 * bytesWritten = the number of bytes stored in 301 * the output buffer (not including the terminating nul). 302 * 303 * Returns: The converted string, or %NULL on an error. 304 * 305 * Throws: GException on failure. 306 */ 307 public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 308 { 309 GError* err = null; 310 311 auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 312 313 if (err !is null) 314 { 315 throw new GException( new ErrorG(err) ); 316 } 317 318 scope(exit) Str.freeString(retStr); 319 return Str.toString(retStr); 320 } 321 322 /** 323 * Converts a string which is in the encoding used by GLib for 324 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 325 * for filenames; on other platforms, this function indirectly depends on 326 * the [current locale][setlocale]. 327 * 328 * The input string shall not contain nul characters even if the @len 329 * argument is positive. A nul character found inside the string will result 330 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. 331 * If the source encoding is not UTF-8 and the conversion output contains a 332 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the 333 * function returns %NULL. Use g_convert() to produce output that 334 * may contain embedded nul characters. 335 * 336 * Params: 337 * opsysstring = a string in the encoding for filenames 338 * len = the length of the string, or -1 if the string is 339 * nul-terminated (Note that some encodings may allow nul 340 * bytes to occur inside strings. In that case, using -1 341 * for the @len parameter is unsafe) 342 * bytesRead = location to store the number of bytes in the 343 * input string that were successfully converted, or %NULL. 344 * Even if the conversion was successful, this may be 345 * less than @len if there were partial characters 346 * at the end of the input. If the error 347 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 348 * stored will be the byte offset after the last valid 349 * input sequence. 350 * bytesWritten = the number of bytes stored in the output 351 * buffer (not including the terminating nul). 352 * 353 * Returns: The converted string, or %NULL on an error. 354 * 355 * Throws: GException on failure. 356 */ 357 public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 358 { 359 GError* err = null; 360 361 auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err); 362 363 if (err !is null) 364 { 365 throw new GException( new ErrorG(err) ); 366 } 367 368 scope(exit) Str.freeString(retStr); 369 return Str.toString(retStr); 370 } 371 372 /** 373 * Obtains the character set for the [current locale][setlocale]; you 374 * might use this character set as an argument to g_convert(), to convert 375 * from the current locale's encoding to some other encoding. (Frequently 376 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) 377 * 378 * On Windows the character set returned by this function is the 379 * so-called system default ANSI code-page. That is the character set 380 * used by the "narrow" versions of C library and Win32 functions that 381 * handle file names. It might be different from the character set 382 * used by the C library's current locale. 383 * 384 * On Linux, the character set is found by consulting nl_langinfo() if 385 * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` 386 * and `CHARSET` are queried in order. 387 * 388 * The return value is %TRUE if the locale's encoding is UTF-8, in that 389 * case you can perhaps avoid calling g_convert(). 390 * 391 * The string returned in @charset is not allocated, and should not be 392 * freed. 393 * 394 * Params: 395 * charset = return location for character set 396 * name, or %NULL. 397 * 398 * Returns: %TRUE if the returned charset is UTF-8 399 */ 400 public static bool getCharset(out string charset) 401 { 402 char* outcharset = null; 403 404 auto p = g_get_charset(&outcharset) != 0; 405 406 charset = Str.toString(outcharset); 407 408 return p; 409 } 410 411 /** 412 * Gets the character set for the current locale. 413 * 414 * Returns: a newly allocated string containing the name 415 * of the character set. This string must be freed with g_free(). 416 */ 417 public static string getCodeset() 418 { 419 auto retStr = g_get_codeset(); 420 421 scope(exit) Str.freeString(retStr); 422 return Str.toString(retStr); 423 } 424 425 /** 426 * Determines the preferred character sets used for filenames. 427 * The first character set from the @charsets is the filename encoding, the 428 * subsequent character sets are used when trying to generate a displayable 429 * representation of a filename, see g_filename_display_name(). 430 * 431 * On Unix, the character sets are determined by consulting the 432 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. 433 * On Windows, the character set used in the GLib API is always UTF-8 434 * and said environment variables have no effect. 435 * 436 * `G_FILENAME_ENCODING` may be set to a comma-separated list of 437 * character set names. The special token "\@locale" is taken 438 * to mean the character set for the [current locale][setlocale]. 439 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, 440 * the character set of the current locale is taken as the filename 441 * encoding. If neither environment variable is set, UTF-8 is taken 442 * as the filename encoding, but the character set of the current locale 443 * is also put in the list of encodings. 444 * 445 * The returned @charsets belong to GLib and must not be freed. 446 * 447 * Note that on Unix, regardless of the locale character set or 448 * `G_FILENAME_ENCODING` value, the actual file names present 449 * on a system might be in any random encoding or just gibberish. 450 * 451 * Returns: %TRUE if the filename encoding is UTF-8. 452 * 453 * Since: 2.6 454 */ 455 public static bool getFilenameCharsets(string[][] charsets) 456 { 457 return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0; 458 } 459 460 /** 461 * Converts a string from UTF-8 to the encoding used for strings by 462 * the C runtime (usually the same as that used by the operating 463 * system) in the [current locale][setlocale]. On Windows this means 464 * the system codepage. 465 * 466 * The input string shall not contain nul characters even if the @len 467 * argument is positive. A nul character found inside the string will result 468 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert 469 * input that may contain embedded nul characters. 470 * 471 * Params: 472 * utf8string = a UTF-8 encoded string 473 * len = the length of the string, or -1 if the string is 474 * nul-terminated. 475 * bytesRead = location to store the number of bytes in the 476 * input string that were successfully converted, or %NULL. 477 * Even if the conversion was successful, this may be 478 * less than @len if there were partial characters 479 * at the end of the input. If the error 480 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 481 * stored will be the byte offset after the last valid 482 * input sequence. 483 * 484 * Returns: A newly-allocated buffer containing the converted string, 485 * or %NULL on an error, and error will be set. 486 * 487 * Throws: GException on failure. 488 */ 489 public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead) 490 { 491 size_t bytesWritten; 492 GError* err = null; 493 494 auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 495 496 if (err !is null) 497 { 498 throw new GException( new ErrorG(err) ); 499 } 500 501 scope(exit) Str.freeString(retStr); 502 return Str.toString(retStr, bytesWritten); 503 } 504 505 /** 506 * Converts a string which is in the encoding used for strings by 507 * the C runtime (usually the same as that used by the operating 508 * system) in the [current locale][setlocale] into a UTF-8 string. 509 * 510 * If the source encoding is not UTF-8 and the conversion output contains a 511 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the 512 * function returns %NULL. 513 * If the source encoding is UTF-8, an embedded nul character is treated with 514 * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with 515 * earlier versions of this library. Use g_convert() to produce output that 516 * may contain embedded nul characters. 517 * 518 * Params: 519 * opsysstring = a string in the 520 * encoding of the current locale. On Windows 521 * this means the system codepage. 522 * bytesRead = location to store the number of bytes in the 523 * input string that were successfully converted, or %NULL. 524 * Even if the conversion was successful, this may be 525 * less than @len if there were partial characters 526 * at the end of the input. If the error 527 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 528 * stored will be the byte offset after the last valid 529 * input sequence. 530 * bytesWritten = the number of bytes stored in the output 531 * buffer (not including the terminating nul). 532 * 533 * Returns: The converted string, or %NULL on an error. 534 * 535 * Throws: GException on failure. 536 */ 537 public static string localeToUtf8(string opsysstring, out size_t bytesRead, out size_t bytesWritten) 538 { 539 GError* err = null; 540 541 auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), cast(ptrdiff_t)opsysstring.length, &bytesRead, &bytesWritten, &err); 542 543 if (err !is null) 544 { 545 throw new GException( new ErrorG(err) ); 546 } 547 548 scope(exit) Str.freeString(retStr); 549 return Str.toString(retStr); 550 } 551 }