glib.CharacterSet source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.CharacterSet;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import gtkc.glib;
31 public  import gtkc.glibtypes;
32 
33 
34 /** */
35 public struct CharacterSet
36 {
37 
38 	/**
39 	 * Converts a string from one character set to another.
40 	 *
41 	 * Note that you should use g_iconv() for streaming conversions.
42 	 * Despite the fact that @byes_read can return information about partial
43 	 * characters, the g_convert_... functions are not generally suitable
44 	 * for streaming. If the underlying converter maintains internal state,
45 	 * then this won't be preserved across successive calls to g_convert(),
46 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
47 	 * this is the GNU C converter for CP1255 which does not emit a base
48 	 * character until it knows that the next character is not a mark that
49 	 * could combine with the base character.)
50 	 *
51 	 * Using extensions such as "//TRANSLIT" may not work (or may not work
52 	 * well) on many platforms.  Consider using g_str_to_ascii() instead.
53 	 *
54 	 * Params:
55 	 *     str = the string to convert
56 	 *     len = the length of the string in bytes, or -1 if the string is
57 	 *         nul-terminated (Note that some encodings may allow nul
58 	 *         bytes to occur inside strings. In that case, using -1
59 	 *         for the @len parameter is unsafe)
60 	 *     toCodeset = name of character set into which to convert @str
61 	 *     fromCodeset = character set of @str.
62 	 *     bytesRead = location to store the number of bytes in the
63 	 *         input string that were successfully converted, or %NULL.
64 	 *         Even if the conversion was successful, this may be
65 	 *         less than @len if there were partial characters
66 	 *         at the end of the input. If the error
67 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
68 	 *         stored will the byte offset after the last valid
69 	 *         input sequence.
70 	 *     bytesWritten = the number of bytes stored in the output buffer (not
71 	 *         including the terminating nul).
72 	 *
73 	 * Return: If the conversion was successful, a newly allocated
74 	 *     nul-terminated string, which must be freed with
75 	 *     g_free(). Otherwise %NULL and @error will be set.
76 	 *
77 	 * Throws: GException on failure.
78 	 */
79 	public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten)
80 	{
81 		GError* err = null;
82 		
83 		auto retStr = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err);
84 		
85 		if (err !is null)
86 		{
87 			throw new GException( new ErrorG(err) );
88 		}
89 		
90 		scope(exit) Str.freeString(retStr);
91 		return Str.toString(retStr);
92 	}
93 
94 	/** */
95 	public static GQuark convertErrorQuark()
96 	{
97 		return g_convert_error_quark();
98 	}
99 
100 	/**
101 	 * Converts a string from one character set to another, possibly
102 	 * including fallback sequences for characters not representable
103 	 * in the output. Note that it is not guaranteed that the specification
104 	 * for the fallback sequences in @fallback will be honored. Some
105 	 * systems may do an approximate conversion from @from_codeset
106 	 * to @to_codeset in their iconv() functions,
107 	 * in which case GLib will simply return that approximate conversion.
108 	 *
109 	 * Note that you should use g_iconv() for streaming conversions.
110 	 * Despite the fact that @byes_read can return information about partial
111 	 * characters, the g_convert_... functions are not generally suitable
112 	 * for streaming. If the underlying converter maintains internal state,
113 	 * then this won't be preserved across successive calls to g_convert(),
114 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
115 	 * this is the GNU C converter for CP1255 which does not emit a base
116 	 * character until it knows that the next character is not a mark that
117 	 * could combine with the base character.)
118 	 *
119 	 * Params:
120 	 *     str = the string to convert
121 	 *     len = the length of the string in bytes, or -1 if the string is
122 	 *         nul-terminated (Note that some encodings may allow nul
123 	 *         bytes to occur inside strings. In that case, using -1
124 	 *         for the @len parameter is unsafe)
125 	 *     toCodeset = name of character set into which to convert @str
126 	 *     fromCodeset = character set of @str.
127 	 *     fallback = UTF-8 string to use in place of character not
128 	 *         present in the target encoding. (The string must be
129 	 *         representable in the target encoding).
130 	 *         If %NULL, characters not in the target encoding will
131 	 *         be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
132 	 *     bytesRead = location to store the number of bytes in the
133 	 *         input string that were successfully converted, or %NULL.
134 	 *         Even if the conversion was successful, this may be
135 	 *         less than @len if there were partial characters
136 	 *         at the end of the input.
137 	 *     bytesWritten = the number of bytes stored in the output buffer (not
138 	 *         including the terminating nul).
139 	 *
140 	 * Return: If the conversion was successful, a newly allocated
141 	 *     nul-terminated string, which must be freed with
142 	 *     g_free(). Otherwise %NULL and @error will be set.
143 	 *
144 	 * Throws: GException on failure.
145 	 */
146 	public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten)
147 	{
148 		GError* err = null;
149 		
150 		auto retStr = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err);
151 		
152 		if (err !is null)
153 		{
154 			throw new GException( new ErrorG(err) );
155 		}
156 		
157 		scope(exit) Str.freeString(retStr);
158 		return Str.toString(retStr);
159 	}
160 
161 	/**
162 	 * Converts a string from one character set to another.
163 	 *
164 	 * Note that you should use g_iconv() for streaming conversions.
165 	 * Despite the fact that @byes_read can return information about partial
166 	 * characters, the g_convert_... functions are not generally suitable
167 	 * for streaming. If the underlying converter maintains internal state,
168 	 * then this won't be preserved across successive calls to g_convert(),
169 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
170 	 * this is the GNU C converter for CP1255 which does not emit a base
171 	 * character until it knows that the next character is not a mark that
172 	 * could combine with the base character.)
173 	 *
174 	 * Params:
175 	 *     str = the string to convert
176 	 *     len = the length of the string in bytes, or -1 if the string is
177 	 *         nul-terminated (Note that some encodings may allow nul
178 	 *         bytes to occur inside strings. In that case, using -1
179 	 *         for the @len parameter is unsafe)
180 	 *     converter = conversion descriptor from g_iconv_open()
181 	 *     bytesRead = location to store the number of bytes in the
182 	 *         input string that were successfully converted, or %NULL.
183 	 *         Even if the conversion was successful, this may be
184 	 *         less than @len if there were partial characters
185 	 *         at the end of the input. If the error
186 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
187 	 *         stored will the byte offset after the last valid
188 	 *         input sequence.
189 	 *     bytesWritten = the number of bytes stored in the output buffer (not
190 	 *         including the terminating nul).
191 	 *
192 	 * Return: If the conversion was successful, a newly allocated
193 	 *     nul-terminated string, which must be freed with
194 	 *     g_free(). Otherwise %NULL and @error will be set.
195 	 *
196 	 * Throws: GException on failure.
197 	 */
198 	public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten)
199 	{
200 		GError* err = null;
201 		
202 		auto retStr = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err);
203 		
204 		if (err !is null)
205 		{
206 			throw new GException( new ErrorG(err) );
207 		}
208 		
209 		scope(exit) Str.freeString(retStr);
210 		return Str.toString(retStr);
211 	}
212 
213 	/**
214 	 * Returns the display basename for the particular filename, guaranteed
215 	 * to be valid UTF-8. The display name might not be identical to the filename,
216 	 * for instance there might be problems converting it to UTF-8, and some files
217 	 * can be translated in the display.
218 	 *
219 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
220 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
221 	 * You can search the result for the UTF-8 encoding of this character (which is
222 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
223 	 * encoding.
224 	 *
225 	 * You must pass the whole absolute pathname to this functions so that
226 	 * translation of well known locations can be done.
227 	 *
228 	 * This function is preferred over g_filename_display_name() if you know the
229 	 * whole path, as it allows translation.
230 	 *
231 	 * Params:
232 	 *     filename = an absolute pathname in the GLib file name encoding
233 	 *
234 	 * Return: a newly allocated string containing
235 	 *     a rendition of the basename of the filename in valid UTF-8
236 	 *
237 	 * Since: 2.6
238 	 */
239 	public static string filenameDisplayBasename(string filename)
240 	{
241 		auto retStr = g_filename_display_basename(Str.toStringz(filename));
242 		
243 		scope(exit) Str.freeString(retStr);
244 		return Str.toString(retStr);
245 	}
246 
247 	/**
248 	 * Converts a filename into a valid UTF-8 string. The conversion is
249 	 * not necessarily reversible, so you should keep the original around
250 	 * and use the return value of this function only for display purposes.
251 	 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
252 	 * even if the filename actually isn't in the GLib file name encoding.
253 	 *
254 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
255 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
256 	 * You can search the result for the UTF-8 encoding of this character (which is
257 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
258 	 * encoding.
259 	 *
260 	 * If you know the whole pathname of the file you should use
261 	 * g_filename_display_basename(), since that allows location-based
262 	 * translation of filenames.
263 	 *
264 	 * Params:
265 	 *     filename = a pathname hopefully in the GLib file name encoding
266 	 *
267 	 * Return: a newly allocated string containing
268 	 *     a rendition of the filename in valid UTF-8
269 	 *
270 	 * Since: 2.6
271 	 */
272 	public static string filenameDisplayName(string filename)
273 	{
274 		auto retStr = g_filename_display_name(Str.toStringz(filename));
275 		
276 		scope(exit) Str.freeString(retStr);
277 		return Str.toString(retStr);
278 	}
279 
280 	/**
281 	 * Converts a string from UTF-8 to the encoding GLib uses for
282 	 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
283 	 * on other platforms, this function indirectly depends on the
284 	 * [current locale][setlocale].
285 	 *
286 	 * Params:
287 	 *     utf8string = a UTF-8 encoded string.
288 	 *     len = the length of the string, or -1 if the string is
289 	 *         nul-terminated.
290 	 *     bytesRead = location to store the number of bytes in
291 	 *         the input string that were successfully converted, or %NULL.
292 	 *         Even if the conversion was successful, this may be
293 	 *         less than @len if there were partial characters
294 	 *         at the end of the input. If the error
295 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
296 	 *         stored will the byte offset after the last valid
297 	 *         input sequence.
298 	 *
299 	 * Return: The converted string, or %NULL on an error.
300 	 *
301 	 * Throws: GException on failure.
302 	 */
303 	public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead)
304 	{
305 		size_t bytesWritten;
306 		GError* err = null;
307 		
308 		auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
309 		
310 		if (err !is null)
311 		{
312 			throw new GException( new ErrorG(err) );
313 		}
314 		
315 		scope(exit) Str.freeString(retStr);
316 		return Str.toString(retStr, bytesWritten);
317 	}
318 
319 	/**
320 	 * Converts a string which is in the encoding used by GLib for
321 	 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
322 	 * for filenames; on other platforms, this function indirectly depends on
323 	 * the [current locale][setlocale].
324 	 *
325 	 * Params:
326 	 *     opsysstring = a string in the encoding for filenames
327 	 *     len = the length of the string, or -1 if the string is
328 	 *         nul-terminated (Note that some encodings may allow nul
329 	 *         bytes to occur inside strings. In that case, using -1
330 	 *         for the @len parameter is unsafe)
331 	 *     bytesRead = location to store the number of bytes in the
332 	 *         input string that were successfully converted, or %NULL.
333 	 *         Even if the conversion was successful, this may be
334 	 *         less than @len if there were partial characters
335 	 *         at the end of the input. If the error
336 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
337 	 *         stored will the byte offset after the last valid
338 	 *         input sequence.
339 	 *     bytesWritten = the number of bytes stored in the output buffer (not
340 	 *         including the terminating nul).
341 	 *
342 	 * Return: The converted string, or %NULL on an error.
343 	 *
344 	 * Throws: GException on failure.
345 	 */
346 	public static string filenameToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten)
347 	{
348 		GError* err = null;
349 		
350 		auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err);
351 		
352 		if (err !is null)
353 		{
354 			throw new GException( new ErrorG(err) );
355 		}
356 		
357 		scope(exit) Str.freeString(retStr);
358 		return Str.toString(retStr);
359 	}
360 
361 	/**
362 	 * Obtains the character set for the [current locale][setlocale]; you
363 	 * might use this character set as an argument to g_convert(), to convert
364 	 * from the current locale's encoding to some other encoding. (Frequently
365 	 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
366 	 *
367 	 * On Windows the character set returned by this function is the
368 	 * so-called system default ANSI code-page. That is the character set
369 	 * used by the "narrow" versions of C library and Win32 functions that
370 	 * handle file names. It might be different from the character set
371 	 * used by the C library's current locale.
372 	 *
373 	 * The return value is %TRUE if the locale's encoding is UTF-8, in that
374 	 * case you can perhaps avoid calling g_convert().
375 	 *
376 	 * The string returned in @charset is not allocated, and should not be
377 	 * freed.
378 	 *
379 	 * Params:
380 	 *     charset = return location for character set
381 	 *         name, or %NULL.
382 	 *
383 	 * Return: %TRUE if the returned charset is UTF-8
384 	 */
385 	public static bool getCharset(out string charset)
386 	{
387 		char* outcharset = null;
388 		
389 		auto p = g_get_charset(&outcharset) != 0;
390 		
391 		charset = Str.toString(outcharset);
392 		
393 		return p;
394 	}
395 
396 	/**
397 	 * Gets the character set for the current locale.
398 	 *
399 	 * Return: a newly allocated string containing the name
400 	 *     of the character set. This string must be freed with g_free().
401 	 */
402 	public static string getCodeset()
403 	{
404 		auto retStr = g_get_codeset();
405 		
406 		scope(exit) Str.freeString(retStr);
407 		return Str.toString(retStr);
408 	}
409 
410 	/**
411 	 * Determines the preferred character sets used for filenames.
412 	 * The first character set from the @charsets is the filename encoding, the
413 	 * subsequent character sets are used when trying to generate a displayable
414 	 * representation of a filename, see g_filename_display_name().
415 	 *
416 	 * On Unix, the character sets are determined by consulting the
417 	 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
418 	 * On Windows, the character set used in the GLib API is always UTF-8
419 	 * and said environment variables have no effect.
420 	 *
421 	 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
422 	 * character set names. The special token "&commat;locale" is taken
423 	 * to  mean the character set for the [current locale][setlocale].
424 	 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
425 	 * the character set of the current locale is taken as the filename
426 	 * encoding. If neither environment variable  is set, UTF-8 is taken
427 	 * as the filename encoding, but the character set of the current locale
428 	 * is also put in the list of encodings.
429 	 *
430 	 * The returned @charsets belong to GLib and must not be freed.
431 	 *
432 	 * Note that on Unix, regardless of the locale character set or
433 	 * `G_FILENAME_ENCODING` value, the actual file names present
434 	 * on a system might be in any random encoding or just gibberish.
435 	 *
436 	 * Params:
437 	 *     charsets = return location for the %NULL-terminated list of encoding names
438 	 *
439 	 * Return: %TRUE if the filename encoding is UTF-8.
440 	 *
441 	 * Since: 2.6
442 	 */
443 	public static bool getFilenameCharsets(string[][] charsets)
444 	{
445 		return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0;
446 	}
447 
448 	/**
449 	 * Converts a string from UTF-8 to the encoding used for strings by
450 	 * the C runtime (usually the same as that used by the operating
451 	 * system) in the [current locale][setlocale]. On Windows this means
452 	 * the system codepage.
453 	 *
454 	 * Params:
455 	 *     utf8string = a UTF-8 encoded string
456 	 *     len = the length of the string, or -1 if the string is
457 	 *         nul-terminated (Note that some encodings may allow nul
458 	 *         bytes to occur inside strings. In that case, using -1
459 	 *         for the @len parameter is unsafe)
460 	 *     bytesRead = location to store the number of bytes in the
461 	 *         input string that were successfully converted, or %NULL.
462 	 *         Even if the conversion was successful, this may be
463 	 *         less than @len if there were partial characters
464 	 *         at the end of the input. If the error
465 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
466 	 *         stored will the byte offset after the last valid
467 	 *         input sequence.
468 	 *     bytesWritten = the number of bytes stored in the output buffer (not
469 	 *         including the terminating nul).
470 	 *
471 	 * Return: A newly-allocated buffer containing the converted string,
472 	 *     or %NULL on an error, and error will be set.
473 	 *
474 	 * Throws: GException on failure.
475 	 */
476 	public static string localeFromUtf8(string utf8string, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten)
477 	{
478 		GError* err = null;
479 		
480 		auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, bytesRead, bytesWritten, &err);
481 		
482 		if (err !is null)
483 		{
484 			throw new GException( new ErrorG(err) );
485 		}
486 		
487 		scope(exit) Str.freeString(retStr);
488 		return Str.toString(retStr);
489 	}
490 
491 	/**
492 	 * Converts a string which is in the encoding used for strings by
493 	 * the C runtime (usually the same as that used by the operating
494 	 * system) in the [current locale][setlocale] into a UTF-8 string.
495 	 *
496 	 * Params:
497 	 *     opsysstring = a string in the encoding of the current locale. On Windows
498 	 *         this means the system codepage.
499 	 *     len = the length of the string, or -1 if the string is
500 	 *         nul-terminated (Note that some encodings may allow nul
501 	 *         bytes to occur inside strings. In that case, using -1
502 	 *         for the @len parameter is unsafe)
503 	 *     bytesRead = location to store the number of bytes in the
504 	 *         input string that were successfully converted, or %NULL.
505 	 *         Even if the conversion was successful, this may be
506 	 *         less than @len if there were partial characters
507 	 *         at the end of the input. If the error
508 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
509 	 *         stored will the byte offset after the last valid
510 	 *         input sequence.
511 	 *     bytesWritten = the number of bytes stored in the output buffer (not
512 	 *         including the terminating nul).
513 	 *
514 	 * Return: A newly-allocated buffer containing the converted string,
515 	 *     or %NULL on an error, and error will be set.
516 	 *
517 	 * Throws: GException on failure.
518 	 */
519 	public static string localeToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten)
520 	{
521 		GError* err = null;
522 		
523 		auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err);
524 		
525 		if (err !is null)
526 		{
527 			throw new GException( new ErrorG(err) );
528 		}
529 		
530 		scope(exit) Str.freeString(retStr);
531 		return Str.toString(retStr);
532 	}
533 }