glib.CharacterSet source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.CharacterSet;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import glib.c.functions;
31 public  import glib.c.types;
32 public  import gtkc.glibtypes;
33 
34 
35 /** */
36 public struct CharacterSet
37 {
38 
39 	/**
40 	 * Converts a string from one character set to another.
41 	 *
42 	 * Note that you should use g_iconv() for streaming conversions.
43 	 * Despite the fact that @bytes_read can return information about partial
44 	 * characters, the g_convert_... functions are not generally suitable
45 	 * for streaming. If the underlying converter maintains internal state,
46 	 * then this won't be preserved across successive calls to g_convert(),
47 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
48 	 * this is the GNU C converter for CP1255 which does not emit a base
49 	 * character until it knows that the next character is not a mark that
50 	 * could combine with the base character.)
51 	 *
52 	 * Using extensions such as "//TRANSLIT" may not work (or may not work
53 	 * well) on many platforms.  Consider using g_str_to_ascii() instead.
54 	 *
55 	 * Params:
56 	 *     str = the string to convert.
57 	 *     toCodeset = name of character set into which to convert @str
58 	 *     fromCodeset = character set of @str.
59 	 *     bytesRead = location to store the number of bytes in
60 	 *         the input string that were successfully converted, or %NULL.
61 	 *         Even if the conversion was successful, this may be
62 	 *         less than @len if there were partial characters
63 	 *         at the end of the input. If the error
64 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
65 	 *         stored will be the byte offset after the last valid
66 	 *         input sequence.
67 	 *
68 	 * Returns: If the conversion was successful, a newly allocated buffer
69 	 *     containing the converted string, which must be freed with g_free().
70 	 *     Otherwise %NULL and @error will be set.
71 	 *
72 	 * Throws: GException on failure.
73 	 */
74 	public static string convert(string str, string toCodeset, string fromCodeset, out size_t bytesRead)
75 	{
76 		size_t bytesWritten;
77 		GError* err = null;
78 
79 		auto retStr = g_convert(Str.toStringz(str), cast(ptrdiff_t)str.length, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err);
80 
81 		if (err !is null)
82 		{
83 			throw new GException( new ErrorG(err) );
84 		}
85 
86 		scope(exit) Str.freeString(retStr);
87 		return Str.toString(retStr, bytesWritten);
88 	}
89 
90 	/** */
91 	public static GQuark convertErrorQuark()
92 	{
93 		return g_convert_error_quark();
94 	}
95 
96 	/**
97 	 * Converts a string from one character set to another, possibly
98 	 * including fallback sequences for characters not representable
99 	 * in the output. Note that it is not guaranteed that the specification
100 	 * for the fallback sequences in @fallback will be honored. Some
101 	 * systems may do an approximate conversion from @from_codeset
102 	 * to @to_codeset in their iconv() functions,
103 	 * in which case GLib will simply return that approximate conversion.
104 	 *
105 	 * Note that you should use g_iconv() for streaming conversions.
106 	 * Despite the fact that @bytes_read can return information about partial
107 	 * characters, the g_convert_... functions are not generally suitable
108 	 * for streaming. If the underlying converter maintains internal state,
109 	 * then this won't be preserved across successive calls to g_convert(),
110 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
111 	 * this is the GNU C converter for CP1255 which does not emit a base
112 	 * character until it knows that the next character is not a mark that
113 	 * could combine with the base character.)
114 	 *
115 	 * Params:
116 	 *     str = the string to convert.
117 	 *     toCodeset = name of character set into which to convert @str
118 	 *     fromCodeset = character set of @str.
119 	 *     fallback = UTF-8 string to use in place of characters not
120 	 *         present in the target encoding. (The string must be
121 	 *         representable in the target encoding).
122 	 *         If %NULL, characters not in the target encoding will
123 	 *         be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
124 	 *     bytesRead = location to store the number of bytes in
125 	 *         the input string that were successfully converted, or %NULL.
126 	 *         Even if the conversion was successful, this may be
127 	 *         less than @len if there were partial characters
128 	 *         at the end of the input.
129 	 *
130 	 * Returns: If the conversion was successful, a newly allocated buffer
131 	 *     containing the converted string, which must be freed with g_free().
132 	 *     Otherwise %NULL and @error will be set.
133 	 *
134 	 * Throws: GException on failure.
135 	 */
136 	public static string convertWithFallback(string str, string toCodeset, string fromCodeset, string fallback, out size_t bytesRead)
137 	{
138 		size_t bytesWritten;
139 		GError* err = null;
140 
141 		auto retStr = g_convert_with_fallback(Str.toStringz(str), cast(ptrdiff_t)str.length, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), &bytesRead, &bytesWritten, &err);
142 
143 		if (err !is null)
144 		{
145 			throw new GException( new ErrorG(err) );
146 		}
147 
148 		scope(exit) Str.freeString(retStr);
149 		return Str.toString(retStr, bytesWritten);
150 	}
151 
152 	/**
153 	 * Converts a string from one character set to another.
154 	 *
155 	 * Note that you should use g_iconv() for streaming conversions.
156 	 * Despite the fact that @bytes_read can return information about partial
157 	 * characters, the g_convert_... functions are not generally suitable
158 	 * for streaming. If the underlying converter maintains internal state,
159 	 * then this won't be preserved across successive calls to g_convert(),
160 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
161 	 * this is the GNU C converter for CP1255 which does not emit a base
162 	 * character until it knows that the next character is not a mark that
163 	 * could combine with the base character.)
164 	 *
165 	 * Characters which are valid in the input character set, but which have no
166 	 * representation in the output character set will result in a
167 	 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error. This is in contrast to the iconv()
168 	 * specification, which leaves this behaviour implementation defined. Note that
169 	 * this is the same error code as is returned for an invalid byte sequence in
170 	 * the input character set. To get defined behaviour for conversion of
171 	 * unrepresentable characters, use g_convert_with_fallback().
172 	 *
173 	 * Params:
174 	 *     str = the string to convert.
175 	 *     converter = conversion descriptor from g_iconv_open()
176 	 *     bytesRead = location to store the number of bytes in
177 	 *         the input string that were successfully converted, or %NULL.
178 	 *         Even if the conversion was successful, this may be
179 	 *         less than @len if there were partial characters
180 	 *         at the end of the input. If the error
181 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
182 	 *         stored will be the byte offset after the last valid
183 	 *         input sequence.
184 	 *
185 	 * Returns: If the conversion was successful, a newly allocated buffer
186 	 *     containing the converted string, which must be freed with
187 	 *     g_free(). Otherwise %NULL and @error will be set.
188 	 *
189 	 * Throws: GException on failure.
190 	 */
191 	public static string convertWithIconv(string str, GIConv converter, out size_t bytesRead)
192 	{
193 		size_t bytesWritten;
194 		GError* err = null;
195 
196 		auto retStr = g_convert_with_iconv(Str.toStringz(str), cast(ptrdiff_t)str.length, converter, &bytesRead, &bytesWritten, &err);
197 
198 		if (err !is null)
199 		{
200 			throw new GException( new ErrorG(err) );
201 		}
202 
203 		scope(exit) Str.freeString(retStr);
204 		return Str.toString(retStr, bytesWritten);
205 	}
206 
207 	/**
208 	 * Returns the display basename for the particular filename, guaranteed
209 	 * to be valid UTF-8. The display name might not be identical to the filename,
210 	 * for instance there might be problems converting it to UTF-8, and some files
211 	 * can be translated in the display.
212 	 *
213 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
214 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
215 	 * You can search the result for the UTF-8 encoding of this character (which is
216 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
217 	 * encoding.
218 	 *
219 	 * You must pass the whole absolute pathname to this functions so that
220 	 * translation of well known locations can be done.
221 	 *
222 	 * This function is preferred over g_filename_display_name() if you know the
223 	 * whole path, as it allows translation.
224 	 *
225 	 * Params:
226 	 *     filename = an absolute pathname in the
227 	 *         GLib file name encoding
228 	 *
229 	 * Returns: a newly allocated string containing
230 	 *     a rendition of the basename of the filename in valid UTF-8
231 	 *
232 	 * Since: 2.6
233 	 */
234 	public static string filenameDisplayBasename(string filename)
235 	{
236 		auto retStr = g_filename_display_basename(Str.toStringz(filename));
237 
238 		scope(exit) Str.freeString(retStr);
239 		return Str.toString(retStr);
240 	}
241 
242 	/**
243 	 * Converts a filename into a valid UTF-8 string. The conversion is
244 	 * not necessarily reversible, so you should keep the original around
245 	 * and use the return value of this function only for display purposes.
246 	 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
247 	 * even if the filename actually isn't in the GLib file name encoding.
248 	 *
249 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
250 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
251 	 * You can search the result for the UTF-8 encoding of this character (which is
252 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
253 	 * encoding.
254 	 *
255 	 * If you know the whole pathname of the file you should use
256 	 * g_filename_display_basename(), since that allows location-based
257 	 * translation of filenames.
258 	 *
259 	 * Params:
260 	 *     filename = a pathname hopefully in the
261 	 *         GLib file name encoding
262 	 *
263 	 * Returns: a newly allocated string containing
264 	 *     a rendition of the filename in valid UTF-8
265 	 *
266 	 * Since: 2.6
267 	 */
268 	public static string filenameDisplayName(string filename)
269 	{
270 		auto retStr = g_filename_display_name(Str.toStringz(filename));
271 
272 		scope(exit) Str.freeString(retStr);
273 		return Str.toString(retStr);
274 	}
275 
276 	/**
277 	 * Converts a string from UTF-8 to the encoding GLib uses for
278 	 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
279 	 * on other platforms, this function indirectly depends on the
280 	 * [current locale][setlocale].
281 	 *
282 	 * The input string shall not contain nul characters even if the @len
283 	 * argument is positive. A nul character found inside the string will result
284 	 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. If the filename encoding is
285 	 * not UTF-8 and the conversion output contains a nul character, the error
286 	 * %G_CONVERT_ERROR_EMBEDDED_NUL is set and the function returns %NULL.
287 	 *
288 	 * Params:
289 	 *     utf8string = a UTF-8 encoded string.
290 	 *     len = the length of the string, or -1 if the string is
291 	 *         nul-terminated.
292 	 *     bytesRead = location to store the number of bytes in
293 	 *         the input string that were successfully converted, or %NULL.
294 	 *         Even if the conversion was successful, this may be
295 	 *         less than @len if there were partial characters
296 	 *         at the end of the input. If the error
297 	 *         %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
298 	 *         stored will be the byte offset after the last valid
299 	 *         input sequence.
300 	 *     bytesWritten = the number of bytes stored in
301 	 *         the output buffer (not including the terminating nul).
302 	 *
303 	 * Returns: The converted string, or %NULL on an error.
304 	 *
305 	 * Throws: GException on failure.
306 	 */
307 	public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
308 	{
309 		GError* err = null;
310 
311 		auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
312 
313 		if (err !is null)
314 		{
315 			throw new GException( new ErrorG(err) );
316 		}
317 
318 		scope(exit) Str.freeString(retStr);
319 		return Str.toString(retStr);
320 	}
321 
322 	/**
323 	 * Converts a string which is in the encoding used by GLib for
324 	 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
325 	 * for filenames; on other platforms, this function indirectly depends on
326 	 * the [current locale][setlocale].
327 	 *
328 	 * The input string shall not contain nul characters even if the @len
329 	 * argument is positive. A nul character found inside the string will result
330 	 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
331 	 * If the source encoding is not UTF-8 and the conversion output contains a
332 	 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
333 	 * function returns %NULL. Use g_convert() to produce output that
334 	 * may contain embedded nul characters.
335 	 *
336 	 * Params:
337 	 *     opsysstring = a string in the encoding for filenames
338 	 *     len = the length of the string, or -1 if the string is
339 	 *         nul-terminated (Note that some encodings may allow nul
340 	 *         bytes to occur inside strings. In that case, using -1
341 	 *         for the @len parameter is unsafe)
342 	 *     bytesRead = location to store the number of bytes in the
343 	 *         input string that were successfully converted, or %NULL.
344 	 *         Even if the conversion was successful, this may be
345 	 *         less than @len if there were partial characters
346 	 *         at the end of the input. If the error
347 	 *         %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
348 	 *         stored will be the byte offset after the last valid
349 	 *         input sequence.
350 	 *     bytesWritten = the number of bytes stored in the output
351 	 *         buffer (not including the terminating nul).
352 	 *
353 	 * Returns: The converted string, or %NULL on an error.
354 	 *
355 	 * Throws: GException on failure.
356 	 */
357 	public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
358 	{
359 		GError* err = null;
360 
361 		auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err);
362 
363 		if (err !is null)
364 		{
365 			throw new GException( new ErrorG(err) );
366 		}
367 
368 		scope(exit) Str.freeString(retStr);
369 		return Str.toString(retStr);
370 	}
371 
372 	/**
373 	 * Obtains the character set for the [current locale][setlocale]; you
374 	 * might use this character set as an argument to g_convert(), to convert
375 	 * from the current locale's encoding to some other encoding. (Frequently
376 	 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
377 	 *
378 	 * On Windows the character set returned by this function is the
379 	 * so-called system default ANSI code-page. That is the character set
380 	 * used by the "narrow" versions of C library and Win32 functions that
381 	 * handle file names. It might be different from the character set
382 	 * used by the C library's current locale.
383 	 *
384 	 * On Linux, the character set is found by consulting nl_langinfo() if
385 	 * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG`
386 	 * and `CHARSET` are queried in order.
387 	 *
388 	 * The return value is %TRUE if the locale's encoding is UTF-8, in that
389 	 * case you can perhaps avoid calling g_convert().
390 	 *
391 	 * The string returned in @charset is not allocated, and should not be
392 	 * freed.
393 	 *
394 	 * Params:
395 	 *     charset = return location for character set
396 	 *         name, or %NULL.
397 	 *
398 	 * Returns: %TRUE if the returned charset is UTF-8
399 	 */
400 	public static bool getCharset(out string charset)
401 	{
402 		char* outcharset = null;
403 
404 		auto p = g_get_charset(&outcharset) != 0;
405 
406 		charset = Str.toString(outcharset);
407 
408 		return p;
409 	}
410 
411 	/**
412 	 * Gets the character set for the current locale.
413 	 *
414 	 * Returns: a newly allocated string containing the name
415 	 *     of the character set. This string must be freed with g_free().
416 	 */
417 	public static string getCodeset()
418 	{
419 		auto retStr = g_get_codeset();
420 
421 		scope(exit) Str.freeString(retStr);
422 		return Str.toString(retStr);
423 	}
424 
425 	/**
426 	 * Determines the preferred character sets used for filenames.
427 	 * The first character set from the @charsets is the filename encoding, the
428 	 * subsequent character sets are used when trying to generate a displayable
429 	 * representation of a filename, see g_filename_display_name().
430 	 *
431 	 * On Unix, the character sets are determined by consulting the
432 	 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
433 	 * On Windows, the character set used in the GLib API is always UTF-8
434 	 * and said environment variables have no effect.
435 	 *
436 	 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
437 	 * character set names. The special token "\@locale" is taken
438 	 * to  mean the character set for the [current locale][setlocale].
439 	 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
440 	 * the character set of the current locale is taken as the filename
441 	 * encoding. If neither environment variable  is set, UTF-8 is taken
442 	 * as the filename encoding, but the character set of the current locale
443 	 * is also put in the list of encodings.
444 	 *
445 	 * The returned @charsets belong to GLib and must not be freed.
446 	 *
447 	 * Note that on Unix, regardless of the locale character set or
448 	 * `G_FILENAME_ENCODING` value, the actual file names present
449 	 * on a system might be in any random encoding or just gibberish.
450 	 *
451 	 * Returns: %TRUE if the filename encoding is UTF-8.
452 	 *
453 	 * Since: 2.6
454 	 */
455 	public static bool getFilenameCharsets(string[][] charsets)
456 	{
457 		return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0;
458 	}
459 
460 	/**
461 	 * Converts a string from UTF-8 to the encoding used for strings by
462 	 * the C runtime (usually the same as that used by the operating
463 	 * system) in the [current locale][setlocale]. On Windows this means
464 	 * the system codepage.
465 	 *
466 	 * The input string shall not contain nul characters even if the @len
467 	 * argument is positive. A nul character found inside the string will result
468 	 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
469 	 * input that may contain embedded nul characters.
470 	 *
471 	 * Params:
472 	 *     utf8string = a UTF-8 encoded string
473 	 *     len = the length of the string, or -1 if the string is
474 	 *         nul-terminated.
475 	 *     bytesRead = location to store the number of bytes in the
476 	 *         input string that were successfully converted, or %NULL.
477 	 *         Even if the conversion was successful, this may be
478 	 *         less than @len if there were partial characters
479 	 *         at the end of the input. If the error
480 	 *         %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
481 	 *         stored will be the byte offset after the last valid
482 	 *         input sequence.
483 	 *
484 	 * Returns: A newly-allocated buffer containing the converted string,
485 	 *     or %NULL on an error, and error will be set.
486 	 *
487 	 * Throws: GException on failure.
488 	 */
489 	public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead)
490 	{
491 		size_t bytesWritten;
492 		GError* err = null;
493 
494 		auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
495 
496 		if (err !is null)
497 		{
498 			throw new GException( new ErrorG(err) );
499 		}
500 
501 		scope(exit) Str.freeString(retStr);
502 		return Str.toString(retStr, bytesWritten);
503 	}
504 
505 	/**
506 	 * Converts a string which is in the encoding used for strings by
507 	 * the C runtime (usually the same as that used by the operating
508 	 * system) in the [current locale][setlocale] into a UTF-8 string.
509 	 *
510 	 * If the source encoding is not UTF-8 and the conversion output contains a
511 	 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
512 	 * function returns %NULL.
513 	 * If the source encoding is UTF-8, an embedded nul character is treated with
514 	 * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
515 	 * earlier versions of this library. Use g_convert() to produce output that
516 	 * may contain embedded nul characters.
517 	 *
518 	 * Params:
519 	 *     opsysstring = a string in the
520 	 *         encoding of the current locale. On Windows
521 	 *         this means the system codepage.
522 	 *     bytesRead = location to store the number of bytes in the
523 	 *         input string that were successfully converted, or %NULL.
524 	 *         Even if the conversion was successful, this may be
525 	 *         less than @len if there were partial characters
526 	 *         at the end of the input. If the error
527 	 *         %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
528 	 *         stored will be the byte offset after the last valid
529 	 *         input sequence.
530 	 *     bytesWritten = the number of bytes stored in the output
531 	 *         buffer (not including the terminating nul).
532 	 *
533 	 * Returns: The converted string, or %NULL on an error.
534 	 *
535 	 * Throws: GException on failure.
536 	 */
537 	public static string localeToUtf8(string opsysstring, out size_t bytesRead, out size_t bytesWritten)
538 	{
539 		GError* err = null;
540 
541 		auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), cast(ptrdiff_t)opsysstring.length, &bytesRead, &bytesWritten, &err);
542 
543 		if (err !is null)
544 		{
545 			throw new GException( new ErrorG(err) );
546 		}
547 
548 		scope(exit) Str.freeString(retStr);
549 		return Str.toString(retStr);
550 	}
551 }