glib.CharacterSet source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.CharacterSet;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import glib.c.functions;
31 public  import glib.c.types;
32 public  import gtkc.glibtypes;
33 
34 
35 /** */
36 public struct CharacterSet
37 {
38 
39 	/**
40 	 * Converts a string from one character set to another.
41 	 *
42 	 * Note that you should use g_iconv() for streaming conversions.
43 	 * Despite the fact that @byes_read can return information about partial
44 	 * characters, the g_convert_... functions are not generally suitable
45 	 * for streaming. If the underlying converter maintains internal state,
46 	 * then this won't be preserved across successive calls to g_convert(),
47 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
48 	 * this is the GNU C converter for CP1255 which does not emit a base
49 	 * character until it knows that the next character is not a mark that
50 	 * could combine with the base character.)
51 	 *
52 	 * Using extensions such as "//TRANSLIT" may not work (or may not work
53 	 * well) on many platforms.  Consider using g_str_to_ascii() instead.
54 	 *
55 	 * Params:
56 	 *     str = the string to convert
57 	 *     len = the length of the string in bytes, or -1 if the string is
58 	 *         nul-terminated (Note that some encodings may allow nul
59 	 *         bytes to occur inside strings. In that case, using -1
60 	 *         for the @len parameter is unsafe)
61 	 *     toCodeset = name of character set into which to convert @str
62 	 *     fromCodeset = character set of @str.
63 	 *     bytesRead = location to store the number of bytes in the
64 	 *         input string that were successfully converted, or %NULL.
65 	 *         Even if the conversion was successful, this may be
66 	 *         less than @len if there were partial characters
67 	 *         at the end of the input. If the error
68 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
69 	 *         stored will the byte offset after the last valid
70 	 *         input sequence.
71 	 *     bytesWritten = the number of bytes stored in the output buffer (not
72 	 *         including the terminating nul).
73 	 *
74 	 * Returns: If the conversion was successful, a newly allocated
75 	 *     nul-terminated string, which must be freed with
76 	 *     g_free(). Otherwise %NULL and @error will be set.
77 	 *
78 	 * Throws: GException on failure.
79 	 */
80 	public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten)
81 	{
82 		GError* err = null;
83 
84 		auto retStr = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err);
85 
86 		if (err !is null)
87 		{
88 			throw new GException( new ErrorG(err) );
89 		}
90 
91 		scope(exit) Str.freeString(retStr);
92 		return Str.toString(retStr);
93 	}
94 
95 	/** */
96 	public static GQuark convertErrorQuark()
97 	{
98 		return g_convert_error_quark();
99 	}
100 
101 	/**
102 	 * Converts a string from one character set to another, possibly
103 	 * including fallback sequences for characters not representable
104 	 * in the output. Note that it is not guaranteed that the specification
105 	 * for the fallback sequences in @fallback will be honored. Some
106 	 * systems may do an approximate conversion from @from_codeset
107 	 * to @to_codeset in their iconv() functions,
108 	 * in which case GLib will simply return that approximate conversion.
109 	 *
110 	 * Note that you should use g_iconv() for streaming conversions.
111 	 * Despite the fact that @byes_read can return information about partial
112 	 * characters, the g_convert_... functions are not generally suitable
113 	 * for streaming. If the underlying converter maintains internal state,
114 	 * then this won't be preserved across successive calls to g_convert(),
115 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
116 	 * this is the GNU C converter for CP1255 which does not emit a base
117 	 * character until it knows that the next character is not a mark that
118 	 * could combine with the base character.)
119 	 *
120 	 * Params:
121 	 *     str = the string to convert
122 	 *     len = the length of the string in bytes, or -1 if the string is
123 	 *         nul-terminated (Note that some encodings may allow nul
124 	 *         bytes to occur inside strings. In that case, using -1
125 	 *         for the @len parameter is unsafe)
126 	 *     toCodeset = name of character set into which to convert @str
127 	 *     fromCodeset = character set of @str.
128 	 *     fallback = UTF-8 string to use in place of character not
129 	 *         present in the target encoding. (The string must be
130 	 *         representable in the target encoding).
131 	 *         If %NULL, characters not in the target encoding will
132 	 *         be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
133 	 *     bytesRead = location to store the number of bytes in the
134 	 *         input string that were successfully converted, or %NULL.
135 	 *         Even if the conversion was successful, this may be
136 	 *         less than @len if there were partial characters
137 	 *         at the end of the input.
138 	 *     bytesWritten = the number of bytes stored in the output buffer (not
139 	 *         including the terminating nul).
140 	 *
141 	 * Returns: If the conversion was successful, a newly allocated
142 	 *     nul-terminated string, which must be freed with
143 	 *     g_free(). Otherwise %NULL and @error will be set.
144 	 *
145 	 * Throws: GException on failure.
146 	 */
147 	public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten)
148 	{
149 		GError* err = null;
150 
151 		auto retStr = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err);
152 
153 		if (err !is null)
154 		{
155 			throw new GException( new ErrorG(err) );
156 		}
157 
158 		scope(exit) Str.freeString(retStr);
159 		return Str.toString(retStr);
160 	}
161 
162 	/**
163 	 * Converts a string from one character set to another.
164 	 *
165 	 * Note that you should use g_iconv() for streaming conversions.
166 	 * Despite the fact that @byes_read can return information about partial
167 	 * characters, the g_convert_... functions are not generally suitable
168 	 * for streaming. If the underlying converter maintains internal state,
169 	 * then this won't be preserved across successive calls to g_convert(),
170 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
171 	 * this is the GNU C converter for CP1255 which does not emit a base
172 	 * character until it knows that the next character is not a mark that
173 	 * could combine with the base character.)
174 	 *
175 	 * Params:
176 	 *     str = the string to convert
177 	 *     len = the length of the string in bytes, or -1 if the string is
178 	 *         nul-terminated (Note that some encodings may allow nul
179 	 *         bytes to occur inside strings. In that case, using -1
180 	 *         for the @len parameter is unsafe)
181 	 *     converter = conversion descriptor from g_iconv_open()
182 	 *     bytesRead = location to store the number of bytes in the
183 	 *         input string that were successfully converted, or %NULL.
184 	 *         Even if the conversion was successful, this may be
185 	 *         less than @len if there were partial characters
186 	 *         at the end of the input. If the error
187 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
188 	 *         stored will the byte offset after the last valid
189 	 *         input sequence.
190 	 *     bytesWritten = the number of bytes stored in the output buffer (not
191 	 *         including the terminating nul).
192 	 *
193 	 * Returns: If the conversion was successful, a newly allocated
194 	 *     nul-terminated string, which must be freed with
195 	 *     g_free(). Otherwise %NULL and @error will be set.
196 	 *
197 	 * Throws: GException on failure.
198 	 */
199 	public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten)
200 	{
201 		GError* err = null;
202 
203 		auto retStr = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err);
204 
205 		if (err !is null)
206 		{
207 			throw new GException( new ErrorG(err) );
208 		}
209 
210 		scope(exit) Str.freeString(retStr);
211 		return Str.toString(retStr);
212 	}
213 
214 	/**
215 	 * Returns the display basename for the particular filename, guaranteed
216 	 * to be valid UTF-8. The display name might not be identical to the filename,
217 	 * for instance there might be problems converting it to UTF-8, and some files
218 	 * can be translated in the display.
219 	 *
220 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
221 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
222 	 * You can search the result for the UTF-8 encoding of this character (which is
223 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
224 	 * encoding.
225 	 *
226 	 * You must pass the whole absolute pathname to this functions so that
227 	 * translation of well known locations can be done.
228 	 *
229 	 * This function is preferred over g_filename_display_name() if you know the
230 	 * whole path, as it allows translation.
231 	 *
232 	 * Params:
233 	 *     filename = an absolute pathname in the
234 	 *         GLib file name encoding
235 	 *
236 	 * Returns: a newly allocated string containing
237 	 *     a rendition of the basename of the filename in valid UTF-8
238 	 *
239 	 * Since: 2.6
240 	 */
241 	public static string filenameDisplayBasename(string filename)
242 	{
243 		auto retStr = g_filename_display_basename(Str.toStringz(filename));
244 
245 		scope(exit) Str.freeString(retStr);
246 		return Str.toString(retStr);
247 	}
248 
249 	/**
250 	 * Converts a filename into a valid UTF-8 string. The conversion is
251 	 * not necessarily reversible, so you should keep the original around
252 	 * and use the return value of this function only for display purposes.
253 	 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
254 	 * even if the filename actually isn't in the GLib file name encoding.
255 	 *
256 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
257 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
258 	 * You can search the result for the UTF-8 encoding of this character (which is
259 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
260 	 * encoding.
261 	 *
262 	 * If you know the whole pathname of the file you should use
263 	 * g_filename_display_basename(), since that allows location-based
264 	 * translation of filenames.
265 	 *
266 	 * Params:
267 	 *     filename = a pathname hopefully in the
268 	 *         GLib file name encoding
269 	 *
270 	 * Returns: a newly allocated string containing
271 	 *     a rendition of the filename in valid UTF-8
272 	 *
273 	 * Since: 2.6
274 	 */
275 	public static string filenameDisplayName(string filename)
276 	{
277 		auto retStr = g_filename_display_name(Str.toStringz(filename));
278 
279 		scope(exit) Str.freeString(retStr);
280 		return Str.toString(retStr);
281 	}
282 
283 	/**
284 	 * Converts a string from UTF-8 to the encoding GLib uses for
285 	 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
286 	 * on other platforms, this function indirectly depends on the
287 	 * [current locale][setlocale].
288 	 *
289 	 * Params:
290 	 *     utf8string = a UTF-8 encoded string.
291 	 *     len = the length of the string, or -1 if the string is
292 	 *         nul-terminated.
293 	 *     bytesRead = location to store the number of bytes in
294 	 *         the input string that were successfully converted, or %NULL.
295 	 *         Even if the conversion was successful, this may be
296 	 *         less than @len if there were partial characters
297 	 *         at the end of the input. If the error
298 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
299 	 *         stored will the byte offset after the last valid
300 	 *         input sequence.
301 	 *
302 	 * Returns: The converted string, or %NULL on an error.
303 	 *
304 	 * Throws: GException on failure.
305 	 */
306 	public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead)
307 	{
308 		size_t bytesWritten;
309 		GError* err = null;
310 
311 		auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
312 
313 		if (err !is null)
314 		{
315 			throw new GException( new ErrorG(err) );
316 		}
317 
318 		scope(exit) Str.freeString(retStr);
319 		return Str.toString(retStr, bytesWritten);
320 	}
321 
322 	/**
323 	 * Converts a string which is in the encoding used by GLib for
324 	 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
325 	 * for filenames; on other platforms, this function indirectly depends on
326 	 * the [current locale][setlocale].
327 	 *
328 	 * Params:
329 	 *     opsysstring = a string in the encoding for filenames
330 	 *     len = the length of the string, or -1 if the string is
331 	 *         nul-terminated (Note that some encodings may allow nul
332 	 *         bytes to occur inside strings. In that case, using -1
333 	 *         for the @len parameter is unsafe)
334 	 *     bytesRead = location to store the number of bytes in the
335 	 *         input string that were successfully converted, or %NULL.
336 	 *         Even if the conversion was successful, this may be
337 	 *         less than @len if there were partial characters
338 	 *         at the end of the input. If the error
339 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
340 	 *         stored will the byte offset after the last valid
341 	 *         input sequence.
342 	 *     bytesWritten = the number of bytes stored in the output
343 	 *         buffer (not including the terminating nul).
344 	 *
345 	 * Returns: The converted string, or %NULL on an error.
346 	 *
347 	 * Throws: GException on failure.
348 	 */
349 	public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
350 	{
351 		GError* err = null;
352 
353 		auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err);
354 
355 		if (err !is null)
356 		{
357 			throw new GException( new ErrorG(err) );
358 		}
359 
360 		scope(exit) Str.freeString(retStr);
361 		return Str.toString(retStr);
362 	}
363 
364 	/**
365 	 * Obtains the character set for the [current locale][setlocale]; you
366 	 * might use this character set as an argument to g_convert(), to convert
367 	 * from the current locale's encoding to some other encoding. (Frequently
368 	 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
369 	 *
370 	 * On Windows the character set returned by this function is the
371 	 * so-called system default ANSI code-page. That is the character set
372 	 * used by the "narrow" versions of C library and Win32 functions that
373 	 * handle file names. It might be different from the character set
374 	 * used by the C library's current locale.
375 	 *
376 	 * The return value is %TRUE if the locale's encoding is UTF-8, in that
377 	 * case you can perhaps avoid calling g_convert().
378 	 *
379 	 * The string returned in @charset is not allocated, and should not be
380 	 * freed.
381 	 *
382 	 * Params:
383 	 *     charset = return location for character set
384 	 *         name, or %NULL.
385 	 *
386 	 * Returns: %TRUE if the returned charset is UTF-8
387 	 */
388 	public static bool getCharset(out string charset)
389 	{
390 		char* outcharset = null;
391 
392 		auto p = g_get_charset(&outcharset) != 0;
393 
394 		charset = Str.toString(outcharset);
395 
396 		return p;
397 	}
398 
399 	/**
400 	 * Gets the character set for the current locale.
401 	 *
402 	 * Returns: a newly allocated string containing the name
403 	 *     of the character set. This string must be freed with g_free().
404 	 */
405 	public static string getCodeset()
406 	{
407 		auto retStr = g_get_codeset();
408 
409 		scope(exit) Str.freeString(retStr);
410 		return Str.toString(retStr);
411 	}
412 
413 	/**
414 	 * Determines the preferred character sets used for filenames.
415 	 * The first character set from the @charsets is the filename encoding, the
416 	 * subsequent character sets are used when trying to generate a displayable
417 	 * representation of a filename, see g_filename_display_name().
418 	 *
419 	 * On Unix, the character sets are determined by consulting the
420 	 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
421 	 * On Windows, the character set used in the GLib API is always UTF-8
422 	 * and said environment variables have no effect.
423 	 *
424 	 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
425 	 * character set names. The special token "\@locale" is taken
426 	 * to  mean the character set for the [current locale][setlocale].
427 	 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
428 	 * the character set of the current locale is taken as the filename
429 	 * encoding. If neither environment variable  is set, UTF-8 is taken
430 	 * as the filename encoding, but the character set of the current locale
431 	 * is also put in the list of encodings.
432 	 *
433 	 * The returned @charsets belong to GLib and must not be freed.
434 	 *
435 	 * Note that on Unix, regardless of the locale character set or
436 	 * `G_FILENAME_ENCODING` value, the actual file names present
437 	 * on a system might be in any random encoding or just gibberish.
438 	 *
439 	 * Params:
440 	 *     charsets = return location for the %NULL-terminated list of encoding names
441 	 *
442 	 * Returns: %TRUE if the filename encoding is UTF-8.
443 	 *
444 	 * Since: 2.6
445 	 */
446 	public static bool getFilenameCharsets(string[][] charsets)
447 	{
448 		return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0;
449 	}
450 
451 	/**
452 	 * Converts a string from UTF-8 to the encoding used for strings by
453 	 * the C runtime (usually the same as that used by the operating
454 	 * system) in the [current locale][setlocale]. On Windows this means
455 	 * the system codepage.
456 	 *
457 	 * Params:
458 	 *     utf8string = a UTF-8 encoded string
459 	 *     len = the length of the string, or -1 if the string is
460 	 *         nul-terminated (Note that some encodings may allow nul
461 	 *         bytes to occur inside strings. In that case, using -1
462 	 *         for the @len parameter is unsafe)
463 	 *     bytesRead = location to store the number of bytes in the
464 	 *         input string that were successfully converted, or %NULL.
465 	 *         Even if the conversion was successful, this may be
466 	 *         less than @len if there were partial characters
467 	 *         at the end of the input. If the error
468 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
469 	 *         stored will the byte offset after the last valid
470 	 *         input sequence.
471 	 *     bytesWritten = the number of bytes stored in the output
472 	 *         buffer (not including the terminating nul).
473 	 *
474 	 * Returns: A newly-allocated buffer containing the converted string,
475 	 *     or %NULL on an error, and error will be set.
476 	 *
477 	 * Throws: GException on failure.
478 	 */
479 	public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
480 	{
481 		GError* err = null;
482 
483 		auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
484 
485 		if (err !is null)
486 		{
487 			throw new GException( new ErrorG(err) );
488 		}
489 
490 		scope(exit) Str.freeString(retStr);
491 		return Str.toString(retStr);
492 	}
493 
494 	/**
495 	 * Converts a string which is in the encoding used for strings by
496 	 * the C runtime (usually the same as that used by the operating
497 	 * system) in the [current locale][setlocale] into a UTF-8 string.
498 	 *
499 	 * Params:
500 	 *     opsysstring = a string in the encoding of the current locale. On Windows
501 	 *         this means the system codepage.
502 	 *     len = the length of the string, or -1 if the string is
503 	 *         nul-terminated (Note that some encodings may allow nul
504 	 *         bytes to occur inside strings. In that case, using -1
505 	 *         for the @len parameter is unsafe)
506 	 *     bytesRead = location to store the number of bytes in the
507 	 *         input string that were successfully converted, or %NULL.
508 	 *         Even if the conversion was successful, this may be
509 	 *         less than @len if there were partial characters
510 	 *         at the end of the input. If the error
511 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
512 	 *         stored will the byte offset after the last valid
513 	 *         input sequence.
514 	 *     bytesWritten = the number of bytes stored in the output
515 	 *         buffer (not including the terminating nul).
516 	 *
517 	 * Returns: A newly-allocated buffer containing the converted string,
518 	 *     or %NULL on an error, and error will be set.
519 	 *
520 	 * Throws: GException on failure.
521 	 */
522 	public static string localeToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
523 	{
524 		GError* err = null;
525 
526 		auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err);
527 
528 		if (err !is null)
529 		{
530 			throw new GException( new ErrorG(err) );
531 		}
532 
533 		scope(exit) Str.freeString(retStr);
534 		return Str.toString(retStr);
535 	}
536 }