1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.CharacterSet; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import gtkc.glib; 31 public import gtkc.glibtypes; 32 33 34 /** */ 35 public struct CharacterSet 36 { 37 38 /** 39 * Converts a string from one character set to another. 40 * 41 * Note that you should use g_iconv() for streaming conversions. 42 * Despite the fact that @byes_read can return information about partial 43 * characters, the g_convert_... functions are not generally suitable 44 * for streaming. If the underlying converter maintains internal state, 45 * then this won't be preserved across successive calls to g_convert(), 46 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 47 * this is the GNU C converter for CP1255 which does not emit a base 48 * character until it knows that the next character is not a mark that 49 * could combine with the base character.) 50 * 51 * Using extensions such as "//TRANSLIT" may not work (or may not work 52 * well) on many platforms. Consider using g_str_to_ascii() instead. 53 * 54 * Params: 55 * str = the string to convert 56 * len = the length of the string in bytes, or -1 if the string is 57 * nul-terminated (Note that some encodings may allow nul 58 * bytes to occur inside strings. In that case, using -1 59 * for the @len parameter is unsafe) 60 * toCodeset = name of character set into which to convert @str 61 * fromCodeset = character set of @str. 62 * bytesRead = location to store the number of bytes in the 63 * input string that were successfully converted, or %NULL. 64 * Even if the conversion was successful, this may be 65 * less than @len if there were partial characters 66 * at the end of the input. If the error 67 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 68 * stored will the byte offset after the last valid 69 * input sequence. 70 * bytesWritten = the number of bytes stored in the output buffer (not 71 * including the terminating nul). 72 * 73 * Return: If the conversion was successful, a newly allocated 74 * nul-terminated string, which must be freed with 75 * g_free(). Otherwise %NULL and @error will be set. 76 * 77 * Throws: GException on failure. 78 */ 79 public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten) 80 { 81 GError* err = null; 82 83 auto retStr = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err); 84 85 if (err !is null) 86 { 87 throw new GException( new ErrorG(err) ); 88 } 89 90 scope(exit) Str.freeString(retStr); 91 return Str.toString(retStr); 92 } 93 94 /** */ 95 public static GQuark convertErrorQuark() 96 { 97 return g_convert_error_quark(); 98 } 99 100 /** 101 * Converts a string from one character set to another, possibly 102 * including fallback sequences for characters not representable 103 * in the output. Note that it is not guaranteed that the specification 104 * for the fallback sequences in @fallback will be honored. Some 105 * systems may do an approximate conversion from @from_codeset 106 * to @to_codeset in their iconv() functions, 107 * in which case GLib will simply return that approximate conversion. 108 * 109 * Note that you should use g_iconv() for streaming conversions. 110 * Despite the fact that @byes_read can return information about partial 111 * characters, the g_convert_... functions are not generally suitable 112 * for streaming. If the underlying converter maintains internal state, 113 * then this won't be preserved across successive calls to g_convert(), 114 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 115 * this is the GNU C converter for CP1255 which does not emit a base 116 * character until it knows that the next character is not a mark that 117 * could combine with the base character.) 118 * 119 * Params: 120 * str = the string to convert 121 * len = the length of the string in bytes, or -1 if the string is 122 * nul-terminated (Note that some encodings may allow nul 123 * bytes to occur inside strings. In that case, using -1 124 * for the @len parameter is unsafe) 125 * toCodeset = name of character set into which to convert @str 126 * fromCodeset = character set of @str. 127 * fallback = UTF-8 string to use in place of character not 128 * present in the target encoding. (The string must be 129 * representable in the target encoding). 130 * If %NULL, characters not in the target encoding will 131 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy. 132 * bytesRead = location to store the number of bytes in the 133 * input string that were successfully converted, or %NULL. 134 * Even if the conversion was successful, this may be 135 * less than @len if there were partial characters 136 * at the end of the input. 137 * bytesWritten = the number of bytes stored in the output buffer (not 138 * including the terminating nul). 139 * 140 * Return: If the conversion was successful, a newly allocated 141 * nul-terminated string, which must be freed with 142 * g_free(). Otherwise %NULL and @error will be set. 143 * 144 * Throws: GException on failure. 145 */ 146 public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten) 147 { 148 GError* err = null; 149 150 auto retStr = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err); 151 152 if (err !is null) 153 { 154 throw new GException( new ErrorG(err) ); 155 } 156 157 scope(exit) Str.freeString(retStr); 158 return Str.toString(retStr); 159 } 160 161 /** 162 * Converts a string from one character set to another. 163 * 164 * Note that you should use g_iconv() for streaming conversions. 165 * Despite the fact that @byes_read can return information about partial 166 * characters, the g_convert_... functions are not generally suitable 167 * for streaming. If the underlying converter maintains internal state, 168 * then this won't be preserved across successive calls to g_convert(), 169 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 170 * this is the GNU C converter for CP1255 which does not emit a base 171 * character until it knows that the next character is not a mark that 172 * could combine with the base character.) 173 * 174 * Params: 175 * str = the string to convert 176 * len = the length of the string in bytes, or -1 if the string is 177 * nul-terminated (Note that some encodings may allow nul 178 * bytes to occur inside strings. In that case, using -1 179 * for the @len parameter is unsafe) 180 * converter = conversion descriptor from g_iconv_open() 181 * bytesRead = location to store the number of bytes in the 182 * input string that were successfully converted, or %NULL. 183 * Even if the conversion was successful, this may be 184 * less than @len if there were partial characters 185 * at the end of the input. If the error 186 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 187 * stored will the byte offset after the last valid 188 * input sequence. 189 * bytesWritten = the number of bytes stored in the output buffer (not 190 * including the terminating nul). 191 * 192 * Return: If the conversion was successful, a newly allocated 193 * nul-terminated string, which must be freed with 194 * g_free(). Otherwise %NULL and @error will be set. 195 * 196 * Throws: GException on failure. 197 */ 198 public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten) 199 { 200 GError* err = null; 201 202 auto retStr = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err); 203 204 if (err !is null) 205 { 206 throw new GException( new ErrorG(err) ); 207 } 208 209 scope(exit) Str.freeString(retStr); 210 return Str.toString(retStr); 211 } 212 213 /** 214 * Returns the display basename for the particular filename, guaranteed 215 * to be valid UTF-8. The display name might not be identical to the filename, 216 * for instance there might be problems converting it to UTF-8, and some files 217 * can be translated in the display. 218 * 219 * If GLib cannot make sense of the encoding of @filename, as a last resort it 220 * replaces unknown characters with U+FFFD, the Unicode replacement character. 221 * You can search the result for the UTF-8 encoding of this character (which is 222 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 223 * encoding. 224 * 225 * You must pass the whole absolute pathname to this functions so that 226 * translation of well known locations can be done. 227 * 228 * This function is preferred over g_filename_display_name() if you know the 229 * whole path, as it allows translation. 230 * 231 * Params: 232 * filename = an absolute pathname in the GLib file name encoding 233 * 234 * Return: a newly allocated string containing 235 * a rendition of the basename of the filename in valid UTF-8 236 * 237 * Since: 2.6 238 */ 239 public static string filenameDisplayBasename(string filename) 240 { 241 auto retStr = g_filename_display_basename(Str.toStringz(filename)); 242 243 scope(exit) Str.freeString(retStr); 244 return Str.toString(retStr); 245 } 246 247 /** 248 * Converts a filename into a valid UTF-8 string. The conversion is 249 * not necessarily reversible, so you should keep the original around 250 * and use the return value of this function only for display purposes. 251 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 252 * even if the filename actually isn't in the GLib file name encoding. 253 * 254 * If GLib cannot make sense of the encoding of @filename, as a last resort it 255 * replaces unknown characters with U+FFFD, the Unicode replacement character. 256 * You can search the result for the UTF-8 encoding of this character (which is 257 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 258 * encoding. 259 * 260 * If you know the whole pathname of the file you should use 261 * g_filename_display_basename(), since that allows location-based 262 * translation of filenames. 263 * 264 * Params: 265 * filename = a pathname hopefully in the GLib file name encoding 266 * 267 * Return: a newly allocated string containing 268 * a rendition of the filename in valid UTF-8 269 * 270 * Since: 2.6 271 */ 272 public static string filenameDisplayName(string filename) 273 { 274 auto retStr = g_filename_display_name(Str.toStringz(filename)); 275 276 scope(exit) Str.freeString(retStr); 277 return Str.toString(retStr); 278 } 279 280 /** 281 * Converts a string from UTF-8 to the encoding GLib uses for 282 * filenames. Note that on Windows GLib uses UTF-8 for filenames; 283 * on other platforms, this function indirectly depends on the 284 * [current locale][setlocale]. 285 * 286 * Params: 287 * utf8string = a UTF-8 encoded string. 288 * len = the length of the string, or -1 if the string is 289 * nul-terminated. 290 * bytesRead = location to store the number of bytes in 291 * the input string that were successfully converted, or %NULL. 292 * Even if the conversion was successful, this may be 293 * less than @len if there were partial characters 294 * at the end of the input. If the error 295 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 296 * stored will the byte offset after the last valid 297 * input sequence. 298 * 299 * Return: The converted string, or %NULL on an error. 300 * 301 * Throws: GException on failure. 302 */ 303 public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead) 304 { 305 size_t bytesWritten; 306 GError* err = null; 307 308 auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 309 310 if (err !is null) 311 { 312 throw new GException( new ErrorG(err) ); 313 } 314 315 scope(exit) Str.freeString(retStr); 316 return Str.toString(retStr, bytesWritten); 317 } 318 319 /** 320 * Converts a string which is in the encoding used by GLib for 321 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 322 * for filenames; on other platforms, this function indirectly depends on 323 * the [current locale][setlocale]. 324 * 325 * Params: 326 * opsysstring = a string in the encoding for filenames 327 * len = the length of the string, or -1 if the string is 328 * nul-terminated (Note that some encodings may allow nul 329 * bytes to occur inside strings. In that case, using -1 330 * for the @len parameter is unsafe) 331 * bytesRead = location to store the number of bytes in the 332 * input string that were successfully converted, or %NULL. 333 * Even if the conversion was successful, this may be 334 * less than @len if there were partial characters 335 * at the end of the input. If the error 336 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 337 * stored will the byte offset after the last valid 338 * input sequence. 339 * bytesWritten = the number of bytes stored in the output buffer (not 340 * including the terminating nul). 341 * 342 * Return: The converted string, or %NULL on an error. 343 * 344 * Throws: GException on failure. 345 */ 346 public static string filenameToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten) 347 { 348 GError* err = null; 349 350 auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err); 351 352 if (err !is null) 353 { 354 throw new GException( new ErrorG(err) ); 355 } 356 357 scope(exit) Str.freeString(retStr); 358 return Str.toString(retStr); 359 } 360 361 /** 362 * Obtains the character set for the [current locale][setlocale]; you 363 * might use this character set as an argument to g_convert(), to convert 364 * from the current locale's encoding to some other encoding. (Frequently 365 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) 366 * 367 * On Windows the character set returned by this function is the 368 * so-called system default ANSI code-page. That is the character set 369 * used by the "narrow" versions of C library and Win32 functions that 370 * handle file names. It might be different from the character set 371 * used by the C library's current locale. 372 * 373 * The return value is %TRUE if the locale's encoding is UTF-8, in that 374 * case you can perhaps avoid calling g_convert(). 375 * 376 * The string returned in @charset is not allocated, and should not be 377 * freed. 378 * 379 * Params: 380 * charset = return location for character set 381 * name, or %NULL. 382 * 383 * Return: %TRUE if the returned charset is UTF-8 384 */ 385 public static bool getCharset(out string charset) 386 { 387 char* outcharset = null; 388 389 auto p = g_get_charset(&outcharset) != 0; 390 391 charset = Str.toString(outcharset); 392 393 return p; 394 } 395 396 /** 397 * Gets the character set for the current locale. 398 * 399 * Return: a newly allocated string containing the name 400 * of the character set. This string must be freed with g_free(). 401 */ 402 public static string getCodeset() 403 { 404 auto retStr = g_get_codeset(); 405 406 scope(exit) Str.freeString(retStr); 407 return Str.toString(retStr); 408 } 409 410 /** 411 * Determines the preferred character sets used for filenames. 412 * The first character set from the @charsets is the filename encoding, the 413 * subsequent character sets are used when trying to generate a displayable 414 * representation of a filename, see g_filename_display_name(). 415 * 416 * On Unix, the character sets are determined by consulting the 417 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. 418 * On Windows, the character set used in the GLib API is always UTF-8 419 * and said environment variables have no effect. 420 * 421 * `G_FILENAME_ENCODING` may be set to a comma-separated list of 422 * character set names. The special token "@locale" is taken 423 * to mean the character set for the [current locale][setlocale]. 424 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, 425 * the character set of the current locale is taken as the filename 426 * encoding. If neither environment variable is set, UTF-8 is taken 427 * as the filename encoding, but the character set of the current locale 428 * is also put in the list of encodings. 429 * 430 * The returned @charsets belong to GLib and must not be freed. 431 * 432 * Note that on Unix, regardless of the locale character set or 433 * `G_FILENAME_ENCODING` value, the actual file names present 434 * on a system might be in any random encoding or just gibberish. 435 * 436 * Params: 437 * charsets = return location for the %NULL-terminated list of encoding names 438 * 439 * Return: %TRUE if the filename encoding is UTF-8. 440 * 441 * Since: 2.6 442 */ 443 public static bool getFilenameCharsets(string[][] charsets) 444 { 445 return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0; 446 } 447 448 /** 449 * Converts a string from UTF-8 to the encoding used for strings by 450 * the C runtime (usually the same as that used by the operating 451 * system) in the [current locale][setlocale]. On Windows this means 452 * the system codepage. 453 * 454 * Params: 455 * utf8string = a UTF-8 encoded string 456 * len = the length of the string, or -1 if the string is 457 * nul-terminated (Note that some encodings may allow nul 458 * bytes to occur inside strings. In that case, using -1 459 * for the @len parameter is unsafe) 460 * bytesRead = location to store the number of bytes in the 461 * input string that were successfully converted, or %NULL. 462 * Even if the conversion was successful, this may be 463 * less than @len if there were partial characters 464 * at the end of the input. If the error 465 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 466 * stored will the byte offset after the last valid 467 * input sequence. 468 * bytesWritten = the number of bytes stored in the output buffer (not 469 * including the terminating nul). 470 * 471 * Return: A newly-allocated buffer containing the converted string, 472 * or %NULL on an error, and error will be set. 473 * 474 * Throws: GException on failure. 475 */ 476 public static string localeFromUtf8(string utf8string, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten) 477 { 478 GError* err = null; 479 480 auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, bytesRead, bytesWritten, &err); 481 482 if (err !is null) 483 { 484 throw new GException( new ErrorG(err) ); 485 } 486 487 scope(exit) Str.freeString(retStr); 488 return Str.toString(retStr); 489 } 490 491 /** 492 * Converts a string which is in the encoding used for strings by 493 * the C runtime (usually the same as that used by the operating 494 * system) in the [current locale][setlocale] into a UTF-8 string. 495 * 496 * Params: 497 * opsysstring = a string in the encoding of the current locale. On Windows 498 * this means the system codepage. 499 * len = the length of the string, or -1 if the string is 500 * nul-terminated (Note that some encodings may allow nul 501 * bytes to occur inside strings. In that case, using -1 502 * for the @len parameter is unsafe) 503 * bytesRead = location to store the number of bytes in the 504 * input string that were successfully converted, or %NULL. 505 * Even if the conversion was successful, this may be 506 * less than @len if there were partial characters 507 * at the end of the input. If the error 508 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 509 * stored will the byte offset after the last valid 510 * input sequence. 511 * bytesWritten = the number of bytes stored in the output buffer (not 512 * including the terminating nul). 513 * 514 * Return: A newly-allocated buffer containing the converted string, 515 * or %NULL on an error, and error will be set. 516 * 517 * Throws: GException on failure. 518 */ 519 public static string localeToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten) 520 { 521 GError* err = null; 522 523 auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err); 524 525 if (err !is null) 526 { 527 throw new GException( new ErrorG(err) ); 528 } 529 530 scope(exit) Str.freeString(retStr); 531 return Str.toString(retStr); 532 } 533 }