1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.CharacterSet; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import glib.c.functions; 31 public import glib.c.types; 32 public import gtkc.glibtypes; 33 34 35 /** */ 36 public struct CharacterSet 37 { 38 39 /** 40 * Converts a string from one character set to another. 41 * 42 * Note that you should use g_iconv() for streaming conversions. 43 * Despite the fact that @byes_read can return information about partial 44 * characters, the g_convert_... functions are not generally suitable 45 * for streaming. If the underlying converter maintains internal state, 46 * then this won't be preserved across successive calls to g_convert(), 47 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 48 * this is the GNU C converter for CP1255 which does not emit a base 49 * character until it knows that the next character is not a mark that 50 * could combine with the base character.) 51 * 52 * Using extensions such as "//TRANSLIT" may not work (or may not work 53 * well) on many platforms. Consider using g_str_to_ascii() instead. 54 * 55 * Params: 56 * str = the string to convert 57 * len = the length of the string in bytes, or -1 if the string is 58 * nul-terminated (Note that some encodings may allow nul 59 * bytes to occur inside strings. In that case, using -1 60 * for the @len parameter is unsafe) 61 * toCodeset = name of character set into which to convert @str 62 * fromCodeset = character set of @str. 63 * bytesRead = location to store the number of bytes in the 64 * input string that were successfully converted, or %NULL. 65 * Even if the conversion was successful, this may be 66 * less than @len if there were partial characters 67 * at the end of the input. If the error 68 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 69 * stored will the byte offset after the last valid 70 * input sequence. 71 * bytesWritten = the number of bytes stored in the output buffer (not 72 * including the terminating nul). 73 * 74 * Returns: If the conversion was successful, a newly allocated 75 * nul-terminated string, which must be freed with 76 * g_free(). Otherwise %NULL and @error will be set. 77 * 78 * Throws: GException on failure. 79 */ 80 public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten) 81 { 82 GError* err = null; 83 84 auto retStr = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err); 85 86 if (err !is null) 87 { 88 throw new GException( new ErrorG(err) ); 89 } 90 91 scope(exit) Str.freeString(retStr); 92 return Str.toString(retStr); 93 } 94 95 /** */ 96 public static GQuark convertErrorQuark() 97 { 98 return g_convert_error_quark(); 99 } 100 101 /** 102 * Converts a string from one character set to another, possibly 103 * including fallback sequences for characters not representable 104 * in the output. Note that it is not guaranteed that the specification 105 * for the fallback sequences in @fallback will be honored. Some 106 * systems may do an approximate conversion from @from_codeset 107 * to @to_codeset in their iconv() functions, 108 * in which case GLib will simply return that approximate conversion. 109 * 110 * Note that you should use g_iconv() for streaming conversions. 111 * Despite the fact that @byes_read can return information about partial 112 * characters, the g_convert_... functions are not generally suitable 113 * for streaming. If the underlying converter maintains internal state, 114 * then this won't be preserved across successive calls to g_convert(), 115 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 116 * this is the GNU C converter for CP1255 which does not emit a base 117 * character until it knows that the next character is not a mark that 118 * could combine with the base character.) 119 * 120 * Params: 121 * str = the string to convert 122 * len = the length of the string in bytes, or -1 if the string is 123 * nul-terminated (Note that some encodings may allow nul 124 * bytes to occur inside strings. In that case, using -1 125 * for the @len parameter is unsafe) 126 * toCodeset = name of character set into which to convert @str 127 * fromCodeset = character set of @str. 128 * fallback = UTF-8 string to use in place of character not 129 * present in the target encoding. (The string must be 130 * representable in the target encoding). 131 * If %NULL, characters not in the target encoding will 132 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy. 133 * bytesRead = location to store the number of bytes in the 134 * input string that were successfully converted, or %NULL. 135 * Even if the conversion was successful, this may be 136 * less than @len if there were partial characters 137 * at the end of the input. 138 * bytesWritten = the number of bytes stored in the output buffer (not 139 * including the terminating nul). 140 * 141 * Returns: If the conversion was successful, a newly allocated 142 * nul-terminated string, which must be freed with 143 * g_free(). Otherwise %NULL and @error will be set. 144 * 145 * Throws: GException on failure. 146 */ 147 public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten) 148 { 149 GError* err = null; 150 151 auto retStr = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err); 152 153 if (err !is null) 154 { 155 throw new GException( new ErrorG(err) ); 156 } 157 158 scope(exit) Str.freeString(retStr); 159 return Str.toString(retStr); 160 } 161 162 /** 163 * Converts a string from one character set to another. 164 * 165 * Note that you should use g_iconv() for streaming conversions. 166 * Despite the fact that @byes_read can return information about partial 167 * characters, the g_convert_... functions are not generally suitable 168 * for streaming. If the underlying converter maintains internal state, 169 * then this won't be preserved across successive calls to g_convert(), 170 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 171 * this is the GNU C converter for CP1255 which does not emit a base 172 * character until it knows that the next character is not a mark that 173 * could combine with the base character.) 174 * 175 * Params: 176 * str = the string to convert 177 * len = the length of the string in bytes, or -1 if the string is 178 * nul-terminated (Note that some encodings may allow nul 179 * bytes to occur inside strings. In that case, using -1 180 * for the @len parameter is unsafe) 181 * converter = conversion descriptor from g_iconv_open() 182 * bytesRead = location to store the number of bytes in the 183 * input string that were successfully converted, or %NULL. 184 * Even if the conversion was successful, this may be 185 * less than @len if there were partial characters 186 * at the end of the input. If the error 187 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 188 * stored will the byte offset after the last valid 189 * input sequence. 190 * bytesWritten = the number of bytes stored in the output buffer (not 191 * including the terminating nul). 192 * 193 * Returns: If the conversion was successful, a newly allocated 194 * nul-terminated string, which must be freed with 195 * g_free(). Otherwise %NULL and @error will be set. 196 * 197 * Throws: GException on failure. 198 */ 199 public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten) 200 { 201 GError* err = null; 202 203 auto retStr = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err); 204 205 if (err !is null) 206 { 207 throw new GException( new ErrorG(err) ); 208 } 209 210 scope(exit) Str.freeString(retStr); 211 return Str.toString(retStr); 212 } 213 214 /** 215 * Returns the display basename for the particular filename, guaranteed 216 * to be valid UTF-8. The display name might not be identical to the filename, 217 * for instance there might be problems converting it to UTF-8, and some files 218 * can be translated in the display. 219 * 220 * If GLib cannot make sense of the encoding of @filename, as a last resort it 221 * replaces unknown characters with U+FFFD, the Unicode replacement character. 222 * You can search the result for the UTF-8 encoding of this character (which is 223 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 224 * encoding. 225 * 226 * You must pass the whole absolute pathname to this functions so that 227 * translation of well known locations can be done. 228 * 229 * This function is preferred over g_filename_display_name() if you know the 230 * whole path, as it allows translation. 231 * 232 * Params: 233 * filename = an absolute pathname in the 234 * GLib file name encoding 235 * 236 * Returns: a newly allocated string containing 237 * a rendition of the basename of the filename in valid UTF-8 238 * 239 * Since: 2.6 240 */ 241 public static string filenameDisplayBasename(string filename) 242 { 243 auto retStr = g_filename_display_basename(Str.toStringz(filename)); 244 245 scope(exit) Str.freeString(retStr); 246 return Str.toString(retStr); 247 } 248 249 /** 250 * Converts a filename into a valid UTF-8 string. The conversion is 251 * not necessarily reversible, so you should keep the original around 252 * and use the return value of this function only for display purposes. 253 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 254 * even if the filename actually isn't in the GLib file name encoding. 255 * 256 * If GLib cannot make sense of the encoding of @filename, as a last resort it 257 * replaces unknown characters with U+FFFD, the Unicode replacement character. 258 * You can search the result for the UTF-8 encoding of this character (which is 259 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 260 * encoding. 261 * 262 * If you know the whole pathname of the file you should use 263 * g_filename_display_basename(), since that allows location-based 264 * translation of filenames. 265 * 266 * Params: 267 * filename = a pathname hopefully in the 268 * GLib file name encoding 269 * 270 * Returns: a newly allocated string containing 271 * a rendition of the filename in valid UTF-8 272 * 273 * Since: 2.6 274 */ 275 public static string filenameDisplayName(string filename) 276 { 277 auto retStr = g_filename_display_name(Str.toStringz(filename)); 278 279 scope(exit) Str.freeString(retStr); 280 return Str.toString(retStr); 281 } 282 283 /** 284 * Converts a string from UTF-8 to the encoding GLib uses for 285 * filenames. Note that on Windows GLib uses UTF-8 for filenames; 286 * on other platforms, this function indirectly depends on the 287 * [current locale][setlocale]. 288 * 289 * Params: 290 * utf8string = a UTF-8 encoded string. 291 * len = the length of the string, or -1 if the string is 292 * nul-terminated. 293 * bytesRead = location to store the number of bytes in 294 * the input string that were successfully converted, or %NULL. 295 * Even if the conversion was successful, this may be 296 * less than @len if there were partial characters 297 * at the end of the input. If the error 298 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 299 * stored will the byte offset after the last valid 300 * input sequence. 301 * 302 * Returns: The converted string, or %NULL on an error. 303 * 304 * Throws: GException on failure. 305 */ 306 public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead) 307 { 308 size_t bytesWritten; 309 GError* err = null; 310 311 auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 312 313 if (err !is null) 314 { 315 throw new GException( new ErrorG(err) ); 316 } 317 318 scope(exit) Str.freeString(retStr); 319 return Str.toString(retStr, bytesWritten); 320 } 321 322 /** 323 * Converts a string which is in the encoding used by GLib for 324 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 325 * for filenames; on other platforms, this function indirectly depends on 326 * the [current locale][setlocale]. 327 * 328 * Params: 329 * opsysstring = a string in the encoding for filenames 330 * len = the length of the string, or -1 if the string is 331 * nul-terminated (Note that some encodings may allow nul 332 * bytes to occur inside strings. In that case, using -1 333 * for the @len parameter is unsafe) 334 * bytesRead = location to store the number of bytes in the 335 * input string that were successfully converted, or %NULL. 336 * Even if the conversion was successful, this may be 337 * less than @len if there were partial characters 338 * at the end of the input. If the error 339 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 340 * stored will the byte offset after the last valid 341 * input sequence. 342 * bytesWritten = the number of bytes stored in the output 343 * buffer (not including the terminating nul). 344 * 345 * Returns: The converted string, or %NULL on an error. 346 * 347 * Throws: GException on failure. 348 */ 349 public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 350 { 351 GError* err = null; 352 353 auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err); 354 355 if (err !is null) 356 { 357 throw new GException( new ErrorG(err) ); 358 } 359 360 scope(exit) Str.freeString(retStr); 361 return Str.toString(retStr); 362 } 363 364 /** 365 * Obtains the character set for the [current locale][setlocale]; you 366 * might use this character set as an argument to g_convert(), to convert 367 * from the current locale's encoding to some other encoding. (Frequently 368 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) 369 * 370 * On Windows the character set returned by this function is the 371 * so-called system default ANSI code-page. That is the character set 372 * used by the "narrow" versions of C library and Win32 functions that 373 * handle file names. It might be different from the character set 374 * used by the C library's current locale. 375 * 376 * The return value is %TRUE if the locale's encoding is UTF-8, in that 377 * case you can perhaps avoid calling g_convert(). 378 * 379 * The string returned in @charset is not allocated, and should not be 380 * freed. 381 * 382 * Params: 383 * charset = return location for character set 384 * name, or %NULL. 385 * 386 * Returns: %TRUE if the returned charset is UTF-8 387 */ 388 public static bool getCharset(out string charset) 389 { 390 char* outcharset = null; 391 392 auto p = g_get_charset(&outcharset) != 0; 393 394 charset = Str.toString(outcharset); 395 396 return p; 397 } 398 399 /** 400 * Gets the character set for the current locale. 401 * 402 * Returns: a newly allocated string containing the name 403 * of the character set. This string must be freed with g_free(). 404 */ 405 public static string getCodeset() 406 { 407 auto retStr = g_get_codeset(); 408 409 scope(exit) Str.freeString(retStr); 410 return Str.toString(retStr); 411 } 412 413 /** 414 * Determines the preferred character sets used for filenames. 415 * The first character set from the @charsets is the filename encoding, the 416 * subsequent character sets are used when trying to generate a displayable 417 * representation of a filename, see g_filename_display_name(). 418 * 419 * On Unix, the character sets are determined by consulting the 420 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. 421 * On Windows, the character set used in the GLib API is always UTF-8 422 * and said environment variables have no effect. 423 * 424 * `G_FILENAME_ENCODING` may be set to a comma-separated list of 425 * character set names. The special token "\@locale" is taken 426 * to mean the character set for the [current locale][setlocale]. 427 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, 428 * the character set of the current locale is taken as the filename 429 * encoding. If neither environment variable is set, UTF-8 is taken 430 * as the filename encoding, but the character set of the current locale 431 * is also put in the list of encodings. 432 * 433 * The returned @charsets belong to GLib and must not be freed. 434 * 435 * Note that on Unix, regardless of the locale character set or 436 * `G_FILENAME_ENCODING` value, the actual file names present 437 * on a system might be in any random encoding or just gibberish. 438 * 439 * Params: 440 * charsets = return location for the %NULL-terminated list of encoding names 441 * 442 * Returns: %TRUE if the filename encoding is UTF-8. 443 * 444 * Since: 2.6 445 */ 446 public static bool getFilenameCharsets(string[][] charsets) 447 { 448 return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0; 449 } 450 451 /** 452 * Converts a string from UTF-8 to the encoding used for strings by 453 * the C runtime (usually the same as that used by the operating 454 * system) in the [current locale][setlocale]. On Windows this means 455 * the system codepage. 456 * 457 * Params: 458 * utf8string = a UTF-8 encoded string 459 * len = the length of the string, or -1 if the string is 460 * nul-terminated (Note that some encodings may allow nul 461 * bytes to occur inside strings. In that case, using -1 462 * for the @len parameter is unsafe) 463 * bytesRead = location to store the number of bytes in the 464 * input string that were successfully converted, or %NULL. 465 * Even if the conversion was successful, this may be 466 * less than @len if there were partial characters 467 * at the end of the input. If the error 468 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 469 * stored will the byte offset after the last valid 470 * input sequence. 471 * bytesWritten = the number of bytes stored in the output 472 * buffer (not including the terminating nul). 473 * 474 * Returns: A newly-allocated buffer containing the converted string, 475 * or %NULL on an error, and error will be set. 476 * 477 * Throws: GException on failure. 478 */ 479 public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 480 { 481 GError* err = null; 482 483 auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 484 485 if (err !is null) 486 { 487 throw new GException( new ErrorG(err) ); 488 } 489 490 scope(exit) Str.freeString(retStr); 491 return Str.toString(retStr); 492 } 493 494 /** 495 * Converts a string which is in the encoding used for strings by 496 * the C runtime (usually the same as that used by the operating 497 * system) in the [current locale][setlocale] into a UTF-8 string. 498 * 499 * Params: 500 * opsysstring = a string in the encoding of the current locale. On Windows 501 * this means the system codepage. 502 * len = the length of the string, or -1 if the string is 503 * nul-terminated (Note that some encodings may allow nul 504 * bytes to occur inside strings. In that case, using -1 505 * for the @len parameter is unsafe) 506 * bytesRead = location to store the number of bytes in the 507 * input string that were successfully converted, or %NULL. 508 * Even if the conversion was successful, this may be 509 * less than @len if there were partial characters 510 * at the end of the input. If the error 511 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 512 * stored will the byte offset after the last valid 513 * input sequence. 514 * bytesWritten = the number of bytes stored in the output 515 * buffer (not including the terminating nul). 516 * 517 * Returns: A newly-allocated buffer containing the converted string, 518 * or %NULL on an error, and error will be set. 519 * 520 * Throws: GException on failure. 521 */ 522 public static string localeToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten) 523 { 524 GError* err = null; 525 526 auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err); 527 528 if (err !is null) 529 { 530 throw new GException( new ErrorG(err) ); 531 } 532 533 scope(exit) Str.freeString(retStr); 534 return Str.toString(retStr); 535 } 536 }