1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.CharacterSet; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import gtkc.glib; 31 public import gtkc.glibtypes; 32 33 34 public struct CharacterSet 35 { 36 /** 37 */ 38 39 /** 40 * Converts a string from one character set to another. 41 * 42 * Note that you should use g_iconv() for streaming conversions. 43 * Despite the fact that @byes_read can return information about partial 44 * characters, the g_convert_... functions are not generally suitable 45 * for streaming. If the underlying converter maintains internal state, 46 * then this won't be preserved across successive calls to g_convert(), 47 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 48 * this is the GNU C converter for CP1255 which does not emit a base 49 * character until it knows that the next character is not a mark that 50 * could combine with the base character.) 51 * 52 * Using extensions such as "//TRANSLIT" may not work (or may not work 53 * well) on many platforms. Consider using g_str_to_ascii() instead. 54 * 55 * Params: 56 * str = the string to convert 57 * len = the length of the string in bytes, or -1 if the string is 58 * nul-terminated (Note that some encodings may allow nul 59 * bytes to occur inside strings. In that case, using -1 60 * for the @len parameter is unsafe) 61 * toCodeset = name of character set into which to convert @str 62 * fromCodeset = character set of @str. 63 * bytesRead = location to store the number of bytes in the 64 * input string that were successfully converted, or %NULL. 65 * Even if the conversion was successful, this may be 66 * less than @len if there were partial characters 67 * at the end of the input. If the error 68 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 69 * stored will the byte offset after the last valid 70 * input sequence. 71 * bytesWritten = the number of bytes stored in the output buffer (not 72 * including the terminating nul). 73 * 74 * Return: If the conversion was successful, a newly allocated 75 * nul-terminated string, which must be freed with 76 * g_free(). Otherwise %NULL and @error will be set. 77 * 78 * Throws: GException on failure. 79 */ 80 public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten) 81 { 82 GError* err = null; 83 84 auto p = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err); 85 86 if (err !is null) 87 { 88 throw new GException( new ErrorG(err) ); 89 } 90 91 return Str.toString(p); 92 } 93 94 public static GQuark convertErrorQuark() 95 { 96 return g_convert_error_quark(); 97 } 98 99 /** 100 * Converts a string from one character set to another, possibly 101 * including fallback sequences for characters not representable 102 * in the output. Note that it is not guaranteed that the specification 103 * for the fallback sequences in @fallback will be honored. Some 104 * systems may do an approximate conversion from @from_codeset 105 * to @to_codeset in their iconv() functions, 106 * in which case GLib will simply return that approximate conversion. 107 * 108 * Note that you should use g_iconv() for streaming conversions. 109 * Despite the fact that @byes_read can return information about partial 110 * characters, the g_convert_... functions are not generally suitable 111 * for streaming. If the underlying converter maintains internal state, 112 * then this won't be preserved across successive calls to g_convert(), 113 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 114 * this is the GNU C converter for CP1255 which does not emit a base 115 * character until it knows that the next character is not a mark that 116 * could combine with the base character.) 117 * 118 * Params: 119 * str = the string to convert 120 * len = the length of the string in bytes, or -1 if the string is 121 * nul-terminated (Note that some encodings may allow nul 122 * bytes to occur inside strings. In that case, using -1 123 * for the @len parameter is unsafe) 124 * toCodeset = name of character set into which to convert @str 125 * fromCodeset = character set of @str. 126 * fallback = UTF-8 string to use in place of character not 127 * present in the target encoding. (The string must be 128 * representable in the target encoding). 129 * If %NULL, characters not in the target encoding will 130 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy. 131 * bytesRead = location to store the number of bytes in the 132 * input string that were successfully converted, or %NULL. 133 * Even if the conversion was successful, this may be 134 * less than @len if there were partial characters 135 * at the end of the input. 136 * bytesWritten = the number of bytes stored in the output buffer (not 137 * including the terminating nul). 138 * 139 * Return: If the conversion was successful, a newly allocated 140 * nul-terminated string, which must be freed with 141 * g_free(). Otherwise %NULL and @error will be set. 142 * 143 * Throws: GException on failure. 144 */ 145 public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten) 146 { 147 GError* err = null; 148 149 auto p = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err); 150 151 if (err !is null) 152 { 153 throw new GException( new ErrorG(err) ); 154 } 155 156 return Str.toString(p); 157 } 158 159 /** 160 * Converts a string from one character set to another. 161 * 162 * Note that you should use g_iconv() for streaming conversions. 163 * Despite the fact that @byes_read can return information about partial 164 * characters, the g_convert_... functions are not generally suitable 165 * for streaming. If the underlying converter maintains internal state, 166 * then this won't be preserved across successive calls to g_convert(), 167 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of 168 * this is the GNU C converter for CP1255 which does not emit a base 169 * character until it knows that the next character is not a mark that 170 * could combine with the base character.) 171 * 172 * Params: 173 * str = the string to convert 174 * len = the length of the string in bytes, or -1 if the string is 175 * nul-terminated (Note that some encodings may allow nul 176 * bytes to occur inside strings. In that case, using -1 177 * for the @len parameter is unsafe) 178 * converter = conversion descriptor from g_iconv_open() 179 * bytesRead = location to store the number of bytes in the 180 * input string that were successfully converted, or %NULL. 181 * Even if the conversion was successful, this may be 182 * less than @len if there were partial characters 183 * at the end of the input. If the error 184 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 185 * stored will the byte offset after the last valid 186 * input sequence. 187 * bytesWritten = the number of bytes stored in the output buffer (not 188 * including the terminating nul). 189 * 190 * Return: If the conversion was successful, a newly allocated 191 * nul-terminated string, which must be freed with 192 * g_free(). Otherwise %NULL and @error will be set. 193 * 194 * Throws: GException on failure. 195 */ 196 public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten) 197 { 198 GError* err = null; 199 200 auto p = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err); 201 202 if (err !is null) 203 { 204 throw new GException( new ErrorG(err) ); 205 } 206 207 return Str.toString(p); 208 } 209 210 /** 211 * Returns the display basename for the particular filename, guaranteed 212 * to be valid UTF-8. The display name might not be identical to the filename, 213 * for instance there might be problems converting it to UTF-8, and some files 214 * can be translated in the display. 215 * 216 * If GLib cannot make sense of the encoding of @filename, as a last resort it 217 * replaces unknown characters with U+FFFD, the Unicode replacement character. 218 * You can search the result for the UTF-8 encoding of this character (which is 219 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 220 * encoding. 221 * 222 * You must pass the whole absolute pathname to this functions so that 223 * translation of well known locations can be done. 224 * 225 * This function is preferred over g_filename_display_name() if you know the 226 * whole path, as it allows translation. 227 * 228 * Params: 229 * filename = an absolute pathname in the GLib file name encoding 230 * 231 * Return: a newly allocated string containing 232 * a rendition of the basename of the filename in valid UTF-8 233 * 234 * Since: 2.6 235 */ 236 public static string filenameDisplayBasename(string filename) 237 { 238 return Str.toString(g_filename_display_basename(Str.toStringz(filename))); 239 } 240 241 /** 242 * Converts a filename into a valid UTF-8 string. The conversion is 243 * not necessarily reversible, so you should keep the original around 244 * and use the return value of this function only for display purposes. 245 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 246 * even if the filename actually isn't in the GLib file name encoding. 247 * 248 * If GLib cannot make sense of the encoding of @filename, as a last resort it 249 * replaces unknown characters with U+FFFD, the Unicode replacement character. 250 * You can search the result for the UTF-8 encoding of this character (which is 251 * "\357\277\275" in octal notation) to find out if @filename was in an invalid 252 * encoding. 253 * 254 * If you know the whole pathname of the file you should use 255 * g_filename_display_basename(), since that allows location-based 256 * translation of filenames. 257 * 258 * Params: 259 * filename = a pathname hopefully in the GLib file name encoding 260 * 261 * Return: a newly allocated string containing 262 * a rendition of the filename in valid UTF-8 263 * 264 * Since: 2.6 265 */ 266 public static string filenameDisplayName(string filename) 267 { 268 return Str.toString(g_filename_display_name(Str.toStringz(filename))); 269 } 270 271 /** 272 * Converts a string from UTF-8 to the encoding GLib uses for 273 * filenames. Note that on Windows GLib uses UTF-8 for filenames; 274 * on other platforms, this function indirectly depends on the 275 * [current locale][setlocale]. 276 * 277 * Params: 278 * utf8string = a UTF-8 encoded string. 279 * len = the length of the string, or -1 if the string is 280 * nul-terminated. 281 * bytesRead = location to store the number of bytes in 282 * the input string that were successfully converted, or %NULL. 283 * Even if the conversion was successful, this may be 284 * less than @len if there were partial characters 285 * at the end of the input. If the error 286 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 287 * stored will the byte offset after the last valid 288 * input sequence. 289 * 290 * Return: The converted string, or %NULL on an error. 291 * 292 * Throws: GException on failure. 293 */ 294 public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead) 295 { 296 size_t bytesWritten; 297 GError* err = null; 298 299 auto p = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err); 300 301 if (err !is null) 302 { 303 throw new GException( new ErrorG(err) ); 304 } 305 306 return Str.toString(p, bytesWritten); 307 } 308 309 /** 310 * Converts a string which is in the encoding used by GLib for 311 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8 312 * for filenames; on other platforms, this function indirectly depends on 313 * the [current locale][setlocale]. 314 * 315 * Params: 316 * opsysstring = a string in the encoding for filenames 317 * len = the length of the string, or -1 if the string is 318 * nul-terminated (Note that some encodings may allow nul 319 * bytes to occur inside strings. In that case, using -1 320 * for the @len parameter is unsafe) 321 * bytesRead = location to store the number of bytes in the 322 * input string that were successfully converted, or %NULL. 323 * Even if the conversion was successful, this may be 324 * less than @len if there were partial characters 325 * at the end of the input. If the error 326 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 327 * stored will the byte offset after the last valid 328 * input sequence. 329 * bytesWritten = the number of bytes stored in the output buffer (not 330 * including the terminating nul). 331 * 332 * Return: The converted string, or %NULL on an error. 333 * 334 * Throws: GException on failure. 335 */ 336 public static string filenameToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten) 337 { 338 GError* err = null; 339 340 auto p = g_filename_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err); 341 342 if (err !is null) 343 { 344 throw new GException( new ErrorG(err) ); 345 } 346 347 return Str.toString(p); 348 } 349 350 /** 351 * Obtains the character set for the [current locale][setlocale]; you 352 * might use this character set as an argument to g_convert(), to convert 353 * from the current locale's encoding to some other encoding. (Frequently 354 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) 355 * 356 * On Windows the character set returned by this function is the 357 * so-called system default ANSI code-page. That is the character set 358 * used by the "narrow" versions of C library and Win32 functions that 359 * handle file names. It might be different from the character set 360 * used by the C library's current locale. 361 * 362 * The return value is %TRUE if the locale's encoding is UTF-8, in that 363 * case you can perhaps avoid calling g_convert(). 364 * 365 * The string returned in @charset is not allocated, and should not be 366 * freed. 367 * 368 * Params: 369 * charset = return location for character set 370 * name, or %NULL. 371 * 372 * Return: %TRUE if the returned charset is UTF-8 373 */ 374 public static bool getCharset(out string charset) 375 { 376 char* outcharset = null; 377 378 auto p = g_get_charset(&outcharset) != 0; 379 380 charset = Str.toString(outcharset); 381 382 return p; 383 } 384 385 /** 386 * Gets the character set for the current locale. 387 * 388 * Return: a newly allocated string containing the name 389 * of the character set. This string must be freed with g_free(). 390 */ 391 public static string getCodeset() 392 { 393 return Str.toString(g_get_codeset()); 394 } 395 396 /** 397 * Determines the preferred character sets used for filenames. 398 * The first character set from the @charsets is the filename encoding, the 399 * subsequent character sets are used when trying to generate a displayable 400 * representation of a filename, see g_filename_display_name(). 401 * 402 * On Unix, the character sets are determined by consulting the 403 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`. 404 * On Windows, the character set used in the GLib API is always UTF-8 405 * and said environment variables have no effect. 406 * 407 * `G_FILENAME_ENCODING` may be set to a comma-separated list of 408 * character set names. The special token "@locale" is taken 409 * to mean the character set for the [current locale][setlocale]. 410 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is, 411 * the character set of the current locale is taken as the filename 412 * encoding. If neither environment variable is set, UTF-8 is taken 413 * as the filename encoding, but the character set of the current locale 414 * is also put in the list of encodings. 415 * 416 * The returned @charsets belong to GLib and must not be freed. 417 * 418 * Note that on Unix, regardless of the locale character set or 419 * `G_FILENAME_ENCODING` value, the actual file names present 420 * on a system might be in any random encoding or just gibberish. 421 * 422 * Params: 423 * charsets = return location for the %NULL-terminated list of encoding names 424 * 425 * Return: %TRUE if the filename encoding is UTF-8. 426 * 427 * Since: 2.6 428 */ 429 public static bool getFilenameCharsets(string[][] charsets) 430 { 431 return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0; 432 } 433 434 /** 435 * Converts a string from UTF-8 to the encoding used for strings by 436 * the C runtime (usually the same as that used by the operating 437 * system) in the [current locale][setlocale]. On Windows this means 438 * the system codepage. 439 * 440 * Params: 441 * utf8string = a UTF-8 encoded string 442 * len = the length of the string, or -1 if the string is 443 * nul-terminated (Note that some encodings may allow nul 444 * bytes to occur inside strings. In that case, using -1 445 * for the @len parameter is unsafe) 446 * bytesRead = location to store the number of bytes in the 447 * input string that were successfully converted, or %NULL. 448 * Even if the conversion was successful, this may be 449 * less than @len if there were partial characters 450 * at the end of the input. If the error 451 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 452 * stored will the byte offset after the last valid 453 * input sequence. 454 * bytesWritten = the number of bytes stored in the output buffer (not 455 * including the terminating nul). 456 * 457 * Return: A newly-allocated buffer containing the converted string, 458 * or %NULL on an error, and error will be set. 459 * 460 * Throws: GException on failure. 461 */ 462 public static string localeFromUtf8(string utf8string, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten) 463 { 464 GError* err = null; 465 466 auto p = g_locale_from_utf8(Str.toStringz(utf8string), len, bytesRead, bytesWritten, &err); 467 468 if (err !is null) 469 { 470 throw new GException( new ErrorG(err) ); 471 } 472 473 return Str.toString(p); 474 } 475 476 /** 477 * Converts a string which is in the encoding used for strings by 478 * the C runtime (usually the same as that used by the operating 479 * system) in the [current locale][setlocale] into a UTF-8 string. 480 * 481 * Params: 482 * opsysstring = a string in the encoding of the current locale. On Windows 483 * this means the system codepage. 484 * len = the length of the string, or -1 if the string is 485 * nul-terminated (Note that some encodings may allow nul 486 * bytes to occur inside strings. In that case, using -1 487 * for the @len parameter is unsafe) 488 * bytesRead = location to store the number of bytes in the 489 * input string that were successfully converted, or %NULL. 490 * Even if the conversion was successful, this may be 491 * less than @len if there were partial characters 492 * at the end of the input. If the error 493 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value 494 * stored will the byte offset after the last valid 495 * input sequence. 496 * bytesWritten = the number of bytes stored in the output buffer (not 497 * including the terminating nul). 498 * 499 * Return: A newly-allocated buffer containing the converted string, 500 * or %NULL on an error, and error will be set. 501 * 502 * Throws: GException on failure. 503 */ 504 public static string localeToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten) 505 { 506 GError* err = null; 507 508 auto p = g_locale_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err); 509 510 if (err !is null) 511 { 512 throw new GException( new ErrorG(err) ); 513 } 514 515 return Str.toString(p); 516 } 517 }