1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.CharacterSet; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import gtkc.glib; 31 public import gtkc.glibtypes; 32 33 34 /** */ 35 public struct CharacterSet 36 { 37 38 /** 39 * Converts a string from one character set to another. 40 * 41 * Note that you should use g_iconv() for streaming conversions. 42 * Despite the fact that @byes_read can return information about partial 43 * characters, the g_convert_... functions are not generally suitable 44 * for streaming. If the underlying converter maintains internal state, 45 * then this won't be preserved across successive calls to g_convert(), 46 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 47 * this is the GNU C converter for CP1255 which does not emit a base 48 * character until it knows that the next character is not a mark that 49 * could combine with the base character.) 50 * 51 * Using extensions such as "//TRANSLIT" may not work (or may not work 52 * well) on many platforms. Consider using g_str_to_ascii() instead. 53 * 54 * Params: 55 * str = the string to convert 56 * len = the length of the string in bytes, or -1 if the string is 57 * nul-terminated (Note that some encodings may allow nul 58 * bytes to occur inside strings. In that case, using -1 59 * for the @len parameter is unsafe) 60 * toCodeset = name of character set into which to convert @str 61 * fromCodeset = character set of @str. 62 * bytesRead = location to store the number of bytes in the 63 * input string that were successfully converted, or %NULL. 64 * Even if the conversion was successful, this may be 65 * less than @len if there were partial characters 66 * at the end of the input. If the error 67 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 68 * stored will the byte offset after the last valid 69 * input sequence. 70 * bytesWritten = the number of bytes stored in the output buffer (not 71 * including the terminating nul). 72 * 73 * Returns: If the conversion was successful, a newly allocated 74 * nul-terminated string, which must be freed with 75 * g_free(). Otherwise %NULL and @error will be set. 76 * 77 * Throws: GException on failure. 78 */ 79 public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten) 80 { 81 GError* err = null; 82 83 auto retStr = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err); 84 85 if (err !is null) 86 { 87 throw new GException( new ErrorG(err) ); 88 } 89 90 scope(exit) Str.freeString(retStr); 91 return Str.toString(retStr); 92 } 93 94 /** */ 95 public static GQuark convertErrorQuark() 96 { 97 return g_convert_error_quark(); 98 } 99 100 /** 101 * Converts a string from one character set to another, possibly 102 * including fallback sequences for characters not representable 103 * in the output. Note that it is not guaranteed that the specification 104 * for the fallback sequences in @fallback will be honored. Some 105 * systems may do an approximate conversion from @from_codeset 106 * to @to_codeset in their iconv() functions, 107 * in which case GLib will simply return that approximate conversion. 108 * 109 * Note that you should use g_iconv() for streaming conversions. 110 * Despite the fact that @byes_read can return information about partial 111 * characters, the g_convert_... functions are not generally suitable 112 * for streaming. If the underlying converter maintains internal state, 113 * then this won't be preserved across successive calls to g_convert(), 114 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 115 * this is the GNU C converter for CP1255 which does not emit a base 116 * character until it knows that the next character is not a mark that 117 * could combine with the base character.) 118 * 119 * Params: 120 * str = the string to convert 121 * len = the length of the string in bytes, or -1 if the string is 122 * nul-terminated (Note that some encodings may allow nul 123 * bytes to occur inside strings. In that case, using -1 124 * for the @len parameter is unsafe) 125 * toCodeset = name of character set into which to convert @str 126 * fromCodeset = character set of @str. 127 * fallback = UTF-8 string to use in place of character not 128 * present in the target encoding. (The string must be 129 * representable in the target encoding). 130 * If %NULL, characters not in the target encoding will 131 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy. 132 * bytesRead = location to store the number of bytes in the 133 * input string that were successfully converted, or %NULL. 134 * Even if the conversion was successful, this may be 135 * less than @len if there were partial characters 136 * at the end of the input. 137 * bytesWritten = the number of bytes stored in the output buffer (not 138 * including the terminating nul). 139 * 140 * Returns: If the conversion was successful, a newly allocated 141 * nul-terminated string, which must be freed with 142 * g_free(). Otherwise %NULL and @error will be set. 143 * 144 * Throws: GException on failure. 145 */ 146 public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten) 147 { 148 GError* err = null; 149 150 auto retStr = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err); 151 152 if (err !is null) 153 { 154 throw new GException( new ErrorG(err) ); 155 } 156 157 scope(exit) Str.freeString(retStr); 158 return Str.toString(retStr); 159 } 160 161 /** 162 * Converts a string from one character set to another. 163 * 164 * Note that you should use g_iconv() for streaming conversions. 165 * Despite the fact that @byes_read can return information about partial 166 * characters, the g_convert_... functions are not generally suitable 167 * for streaming. If the underlying converter maintains internal state, 168 * then this won't be preserved across successive calls to g_convert(), 169 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 170 * this is the GNU C converter for CP1255 which does not emit a base 171 * character until it knows that the next character is not a mark that 172 * could combine with the base character.) 173 * 174 * Params: 175 * str = the string to convert 176 * len = the length of the string in bytes, or -1 if the string is 177 * nul-terminated (Note that some encodings may allow nul 178 * bytes to occur inside strings. In that case, using -1 179 * for the @len parameter is unsafe) 180 * converter = conversion descriptor from g_iconv_open() 181 * bytesRead = location to store the number of bytes in the 182 * input string that were successfully converted, or %NULL. 183 * Even if the conversion was successful, this may be 184 * less than @len if there were partial characters 185 * at the end of the input. If the error 186 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 187 * stored will the byte offset after the last valid 188 * input sequence. 189 * bytesWritten = the number of bytes stored in the output buffer (not 190 * including the terminating nul). 191 * 192 * Returns: If the conversion was successful, a newly allocated 193 * nul-terminated string, which must be freed with 194 * g_free(). Otherwise %NULL and @error will be set. 195 * 196 * Throws: GException on failure. 197 */ 198 public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten) 199 { 200 GError* err = null; 201 202 auto retStr = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err); 203 204 if (err !is null) 205 { 206 throw new GException( new ErrorG(err) ); 207 } 208 209 scope(exit) Str.freeString(retStr); 210 return Str.toString(retStr); 211 } 212 213 /** 214 * Returns the display basename for the particular filename, guaranteed 215 * to be valid UTF-8. The display name might not be identical to the filename, 216 * for instance there might be problems converting it to UTF-8, and some files 217 * can be translated in the display. 218 * 219 * If GLib cannot make sense of the encoding of @filename, as a last resort it 220 * replaces unknown characters with U+FFFD, the Unicode replacement character. 221 * You can search the result for the UTF-8 encoding of this character (which is 222 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 223 * encoding. 224 * 225 * You must pass the whole absolute pathname to this functions so that 226 * translation of well known locations can be done. 227 * 228 * This function is preferred over g_filename_display_name() if you know the 229 * whole path, as it allows translation. 230 * 231 * Params: 232 * filename = an absolute pathname in the 233 * GLib file name encoding 234 * 235 * Returns: a newly allocated string containing 236 * a rendition of the basename of the filename in valid UTF-8 237 * 238 * Since: 2.6 239 */ 240 public static string filenameDisplayBasename(string filename) 241 { 242 auto retStr = g_filename_display_basename(Str.toStringz(filename)); 243 244 scope(exit) Str.freeString(retStr); 245 return Str.toString(retStr); 246 } 247 248 /** 249 * Converts a filename into a valid UTF-8 string. The conversion is 250 * not necessarily reversible, so you should keep the original around 251 * and use the return value of this function only for display purposes. 252 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 253 * even if the filename actually isn't in the GLib file name encoding. 254 * 255 * If GLib cannot make sense of the encoding of @filename, as a last resort it 256 * replaces unknown characters with U+FFFD, the Unicode replacement character. 257 * You can search the result for the UTF-8 encoding of this character (which is 258 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 259 * encoding. 260 * 261 * If you know the whole pathname of the file you should use 262 * g_filename_display_basename(), since that allows location-based 263 * translation of filenames. 264 * 265 * Params: 266 * filename = a pathname hopefully in the 267 * GLib file name encoding 268 * 269 * Returns: a newly allocated string containing 270 * a rendition of the filename in valid UTF-8 271 * 272 * Since: 2.6 273 */ 274 public static string filenameDisplayName(string filename) 275 { 276 auto retStr = g_filename_display_name(Str.toStringz(filename)); 277 278 scope(exit) Str.freeString(retStr); 279 return Str.toString(retStr); 280 } 281 282 /** 283 * Converts a string from UTF-8 to the encoding GLib uses for 284 * filenames. Note that on Windows GLib uses UTF-8 for filenames; 285 * on other platforms, this function indirectly depends on the 286 * [current locale][setlocale]. 287 * 288 * Params: 289 * utf8string = a UTF-8 encoded string. 290 * len = the length of the string, or -1 if the string is 291 * nul-terminated. 292 * bytesRead = location to store the number of bytes in 293 * the input string that were successfully converted, or %NULL. 294 * Even if the conversion was successful, this may be 295 * less than @len if there were partial characters 296 * at the end of the input. If the error 297 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 298 * stored will the byte offset after the last valid 299 * input sequence. 300 * 301 * Returns: The converted string, or %NULL on an error. 302 * 303 * Throws: GException on failure. 304 */ 305 public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead) 306 { 307 size_t bytesWritten; 308 GError* err = null; 309 310 auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 311 312 if (err !is null) 313 { 314 throw new GException( new ErrorG(err) ); 315 } 316 317 scope(exit) Str.freeString(retStr); 318 return Str.toString(retStr, bytesWritten); 319 } 320 321 /** 322 * Converts a string which is in the encoding used by GLib for 323 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 324 * for filenames; on other platforms, this function indirectly depends on 325 * the [current locale][setlocale]. 326 * 327 * Params: 328 * opsysstring = a string in the encoding for filenames 329 * len = the length of the string, or -1 if the string is 330 * nul-terminated (Note that some encodings may allow nul 331 * bytes to occur inside strings. In that case, using -1 332 * for the @len parameter is unsafe) 333 * bytesRead = location to store the number of bytes in the 334 * input string that were successfully converted, or %NULL. 335 * Even if the conversion was successful, this may be 336 * less than @len if there were partial characters 337 * at the end of the input. If the error 338 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 339 * stored will the byte offset after the last valid 340 * input sequence. 341 * bytesWritten = the number of bytes stored in the output 342 * buffer (not including the terminating nul). 343 * 344 * Returns: The converted string, or %NULL on an error. 345 * 346 * Throws: GException on failure. 347 */ 348 public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 349 { 350 GError* err = null; 351 352 auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err); 353 354 if (err !is null) 355 { 356 throw new GException( new ErrorG(err) ); 357 } 358 359 scope(exit) Str.freeString(retStr); 360 return Str.toString(retStr); 361 } 362 363 /** 364 * Obtains the character set for the [current locale][setlocale]; you 365 * might use this character set as an argument to g_convert(), to convert 366 * from the current locale's encoding to some other encoding. (Frequently 367 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) 368 * 369 * On Windows the character set returned by this function is the 370 * so-called system default ANSI code-page. That is the character set 371 * used by the "narrow" versions of C library and Win32 functions that 372 * handle file names. It might be different from the character set 373 * used by the C library's current locale. 374 * 375 * The return value is %TRUE if the locale's encoding is UTF-8, in that 376 * case you can perhaps avoid calling g_convert(). 377 * 378 * The string returned in @charset is not allocated, and should not be 379 * freed. 380 * 381 * Params: 382 * charset = return location for character set 383 * name, or %NULL. 384 * 385 * Returns: %TRUE if the returned charset is UTF-8 386 */ 387 public static bool getCharset(out string charset) 388 { 389 char* outcharset = null; 390 391 auto p = g_get_charset(&outcharset) != 0; 392 393 charset = Str.toString(outcharset); 394 395 return p; 396 } 397 398 /** 399 * Gets the character set for the current locale. 400 * 401 * Returns: a newly allocated string containing the name 402 * of the character set. This string must be freed with g_free(). 403 */ 404 public static string getCodeset() 405 { 406 auto retStr = g_get_codeset(); 407 408 scope(exit) Str.freeString(retStr); 409 return Str.toString(retStr); 410 } 411 412 /** 413 * Determines the preferred character sets used for filenames. 414 * The first character set from the @charsets is the filename encoding, the 415 * subsequent character sets are used when trying to generate a displayable 416 * representation of a filename, see g_filename_display_name(). 417 * 418 * On Unix, the character sets are determined by consulting the 419 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. 420 * On Windows, the character set used in the GLib API is always UTF-8 421 * and said environment variables have no effect. 422 * 423 * `G_FILENAME_ENCODING` may be set to a comma-separated list of 424 * character set names. The special token "\@locale" is taken 425 * to mean the character set for the [current locale][setlocale]. 426 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, 427 * the character set of the current locale is taken as the filename 428 * encoding. If neither environment variable is set, UTF-8 is taken 429 * as the filename encoding, but the character set of the current locale 430 * is also put in the list of encodings. 431 * 432 * The returned @charsets belong to GLib and must not be freed. 433 * 434 * Note that on Unix, regardless of the locale character set or 435 * `G_FILENAME_ENCODING` value, the actual file names present 436 * on a system might be in any random encoding or just gibberish. 437 * 438 * Params: 439 * charsets = return location for the %NULL-terminated list of encoding names 440 * 441 * Returns: %TRUE if the filename encoding is UTF-8. 442 * 443 * Since: 2.6 444 */ 445 public static bool getFilenameCharsets(string[][] charsets) 446 { 447 return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0; 448 } 449 450 /** 451 * Converts a string from UTF-8 to the encoding used for strings by 452 * the C runtime (usually the same as that used by the operating 453 * system) in the [current locale][setlocale]. On Windows this means 454 * the system codepage. 455 * 456 * Params: 457 * utf8string = a UTF-8 encoded string 458 * len = the length of the string, or -1 if the string is 459 * nul-terminated (Note that some encodings may allow nul 460 * bytes to occur inside strings. In that case, using -1 461 * for the @len parameter is unsafe) 462 * bytesRead = location to store the number of bytes in the 463 * input string that were successfully converted, or %NULL. 464 * Even if the conversion was successful, this may be 465 * less than @len if there were partial characters 466 * at the end of the input. If the error 467 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 468 * stored will the byte offset after the last valid 469 * input sequence. 470 * bytesWritten = the number of bytes stored in the output 471 * buffer (not including the terminating nul). 472 * 473 * Returns: A newly-allocated buffer containing the converted string, 474 * or %NULL on an error, and error will be set. 475 * 476 * Throws: GException on failure. 477 */ 478 public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 479 { 480 GError* err = null; 481 482 auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 483 484 if (err !is null) 485 { 486 throw new GException( new ErrorG(err) ); 487 } 488 489 scope(exit) Str.freeString(retStr); 490 return Str.toString(retStr); 491 } 492 493 /** 494 * Converts a string which is in the encoding used for strings by 495 * the C runtime (usually the same as that used by the operating 496 * system) in the [current locale][setlocale] into a UTF-8 string. 497 * 498 * Params: 499 * opsysstring = a string in the encoding of the current locale. On Windows 500 * this means the system codepage. 501 * len = the length of the string, or -1 if the string is 502 * nul-terminated (Note that some encodings may allow nul 503 * bytes to occur inside strings. In that case, using -1 504 * for the @len parameter is unsafe) 505 * bytesRead = location to store the number of bytes in the 506 * input string that were successfully converted, or %NULL. 507 * Even if the conversion was successful, this may be 508 * less than @len if there were partial characters 509 * at the end of the input. If the error 510 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 511 * stored will the byte offset after the last valid 512 * input sequence. 513 * bytesWritten = the number of bytes stored in the output 514 * buffer (not including the terminating nul). 515 * 516 * Returns: A newly-allocated buffer containing the converted string, 517 * or %NULL on an error, and error will be set. 518 * 519 * Throws: GException on failure. 520 */ 521 public static string localeToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 522 { 523 GError* err = null; 524 525 auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err); 526 527 if (err !is null) 528 { 529 throw new GException( new ErrorG(err) ); 530 } 531 532 scope(exit) Str.freeString(retStr); 533 return Str.toString(retStr); 534 } 535 }