glib.CharacterSet source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.CharacterSet;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import gtkc.glib;
31 public  import gtkc.glibtypes;
32 
33 
34 /** */
35 public struct CharacterSet
36 {
37 
38 	/**
39 	 * Converts a string from one character set to another.
40 	 *
41 	 * Note that you should use g_iconv() for streaming conversions.
42 	 * Despite the fact that @byes_read can return information about partial
43 	 * characters, the g_convert_... functions are not generally suitable
44 	 * for streaming. If the underlying converter maintains internal state,
45 	 * then this won't be preserved across successive calls to g_convert(),
46 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
47 	 * this is the GNU C converter for CP1255 which does not emit a base
48 	 * character until it knows that the next character is not a mark that
49 	 * could combine with the base character.)
50 	 *
51 	 * Using extensions such as "//TRANSLIT" may not work (or may not work
52 	 * well) on many platforms.  Consider using g_str_to_ascii() instead.
53 	 *
54 	 * Params:
55 	 *     str = the string to convert
56 	 *     len = the length of the string in bytes, or -1 if the string is
57 	 *         nul-terminated (Note that some encodings may allow nul
58 	 *         bytes to occur inside strings. In that case, using -1
59 	 *         for the @len parameter is unsafe)
60 	 *     toCodeset = name of character set into which to convert @str
61 	 *     fromCodeset = character set of @str.
62 	 *     bytesRead = location to store the number of bytes in the
63 	 *         input string that were successfully converted, or %NULL.
64 	 *         Even if the conversion was successful, this may be
65 	 *         less than @len if there were partial characters
66 	 *         at the end of the input. If the error
67 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
68 	 *         stored will the byte offset after the last valid
69 	 *         input sequence.
70 	 *     bytesWritten = the number of bytes stored in the output buffer (not
71 	 *         including the terminating nul).
72 	 *
73 	 * Returns: If the conversion was successful, a newly allocated
74 	 *     nul-terminated string, which must be freed with
75 	 *     g_free(). Otherwise %NULL and @error will be set.
76 	 *
77 	 * Throws: GException on failure.
78 	 */
79 	public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten)
80 	{
81 		GError* err = null;
82 		
83 		auto retStr = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err);
84 		
85 		if (err !is null)
86 		{
87 			throw new GException( new ErrorG(err) );
88 		}
89 		
90 		scope(exit) Str.freeString(retStr);
91 		return Str.toString(retStr);
92 	}
93 
94 	/** */
95 	public static GQuark convertErrorQuark()
96 	{
97 		return g_convert_error_quark();
98 	}
99 
100 	/**
101 	 * Converts a string from one character set to another, possibly
102 	 * including fallback sequences for characters not representable
103 	 * in the output. Note that it is not guaranteed that the specification
104 	 * for the fallback sequences in @fallback will be honored. Some
105 	 * systems may do an approximate conversion from @from_codeset
106 	 * to @to_codeset in their iconv() functions,
107 	 * in which case GLib will simply return that approximate conversion.
108 	 *
109 	 * Note that you should use g_iconv() for streaming conversions.
110 	 * Despite the fact that @byes_read can return information about partial
111 	 * characters, the g_convert_... functions are not generally suitable
112 	 * for streaming. If the underlying converter maintains internal state,
113 	 * then this won't be preserved across successive calls to g_convert(),
114 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
115 	 * this is the GNU C converter for CP1255 which does not emit a base
116 	 * character until it knows that the next character is not a mark that
117 	 * could combine with the base character.)
118 	 *
119 	 * Params:
120 	 *     str = the string to convert
121 	 *     len = the length of the string in bytes, or -1 if the string is
122 	 *         nul-terminated (Note that some encodings may allow nul
123 	 *         bytes to occur inside strings. In that case, using -1
124 	 *         for the @len parameter is unsafe)
125 	 *     toCodeset = name of character set into which to convert @str
126 	 *     fromCodeset = character set of @str.
127 	 *     fallback = UTF-8 string to use in place of character not
128 	 *         present in the target encoding. (The string must be
129 	 *         representable in the target encoding).
130 	 *         If %NULL, characters not in the target encoding will
131 	 *         be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
132 	 *     bytesRead = location to store the number of bytes in the
133 	 *         input string that were successfully converted, or %NULL.
134 	 *         Even if the conversion was successful, this may be
135 	 *         less than @len if there were partial characters
136 	 *         at the end of the input.
137 	 *     bytesWritten = the number of bytes stored in the output buffer (not
138 	 *         including the terminating nul).
139 	 *
140 	 * Returns: If the conversion was successful, a newly allocated
141 	 *     nul-terminated string, which must be freed with
142 	 *     g_free(). Otherwise %NULL and @error will be set.
143 	 *
144 	 * Throws: GException on failure.
145 	 */
146 	public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten)
147 	{
148 		GError* err = null;
149 		
150 		auto retStr = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err);
151 		
152 		if (err !is null)
153 		{
154 			throw new GException( new ErrorG(err) );
155 		}
156 		
157 		scope(exit) Str.freeString(retStr);
158 		return Str.toString(retStr);
159 	}
160 
161 	/**
162 	 * Converts a string from one character set to another.
163 	 *
164 	 * Note that you should use g_iconv() for streaming conversions.
165 	 * Despite the fact that @byes_read can return information about partial
166 	 * characters, the g_convert_... functions are not generally suitable
167 	 * for streaming. If the underlying converter maintains internal state,
168 	 * then this won't be preserved across successive calls to g_convert(),
169 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
170 	 * this is the GNU C converter for CP1255 which does not emit a base
171 	 * character until it knows that the next character is not a mark that
172 	 * could combine with the base character.)
173 	 *
174 	 * Params:
175 	 *     str = the string to convert
176 	 *     len = the length of the string in bytes, or -1 if the string is
177 	 *         nul-terminated (Note that some encodings may allow nul
178 	 *         bytes to occur inside strings. In that case, using -1
179 	 *         for the @len parameter is unsafe)
180 	 *     converter = conversion descriptor from g_iconv_open()
181 	 *     bytesRead = location to store the number of bytes in the
182 	 *         input string that were successfully converted, or %NULL.
183 	 *         Even if the conversion was successful, this may be
184 	 *         less than @len if there were partial characters
185 	 *         at the end of the input. If the error
186 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
187 	 *         stored will the byte offset after the last valid
188 	 *         input sequence.
189 	 *     bytesWritten = the number of bytes stored in the output buffer (not
190 	 *         including the terminating nul).
191 	 *
192 	 * Returns: If the conversion was successful, a newly allocated
193 	 *     nul-terminated string, which must be freed with
194 	 *     g_free(). Otherwise %NULL and @error will be set.
195 	 *
196 	 * Throws: GException on failure.
197 	 */
198 	public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten)
199 	{
200 		GError* err = null;
201 		
202 		auto retStr = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err);
203 		
204 		if (err !is null)
205 		{
206 			throw new GException( new ErrorG(err) );
207 		}
208 		
209 		scope(exit) Str.freeString(retStr);
210 		return Str.toString(retStr);
211 	}
212 
213 	/**
214 	 * Returns the display basename for the particular filename, guaranteed
215 	 * to be valid UTF-8. The display name might not be identical to the filename,
216 	 * for instance there might be problems converting it to UTF-8, and some files
217 	 * can be translated in the display.
218 	 *
219 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
220 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
221 	 * You can search the result for the UTF-8 encoding of this character (which is
222 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
223 	 * encoding.
224 	 *
225 	 * You must pass the whole absolute pathname to this functions so that
226 	 * translation of well known locations can be done.
227 	 *
228 	 * This function is preferred over g_filename_display_name() if you know the
229 	 * whole path, as it allows translation.
230 	 *
231 	 * Params:
232 	 *     filename = an absolute pathname in the
233 	 *         GLib file name encoding
234 	 *
235 	 * Returns: a newly allocated string containing
236 	 *     a rendition of the basename of the filename in valid UTF-8
237 	 *
238 	 * Since: 2.6
239 	 */
240 	public static string filenameDisplayBasename(string filename)
241 	{
242 		auto retStr = g_filename_display_basename(Str.toStringz(filename));
243 		
244 		scope(exit) Str.freeString(retStr);
245 		return Str.toString(retStr);
246 	}
247 
248 	/**
249 	 * Converts a filename into a valid UTF-8 string. The conversion is
250 	 * not necessarily reversible, so you should keep the original around
251 	 * and use the return value of this function only for display purposes.
252 	 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
253 	 * even if the filename actually isn't in the GLib file name encoding.
254 	 *
255 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
256 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
257 	 * You can search the result for the UTF-8 encoding of this character (which is
258 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
259 	 * encoding.
260 	 *
261 	 * If you know the whole pathname of the file you should use
262 	 * g_filename_display_basename(), since that allows location-based
263 	 * translation of filenames.
264 	 *
265 	 * Params:
266 	 *     filename = a pathname hopefully in the
267 	 *         GLib file name encoding
268 	 *
269 	 * Returns: a newly allocated string containing
270 	 *     a rendition of the filename in valid UTF-8
271 	 *
272 	 * Since: 2.6
273 	 */
274 	public static string filenameDisplayName(string filename)
275 	{
276 		auto retStr = g_filename_display_name(Str.toStringz(filename));
277 		
278 		scope(exit) Str.freeString(retStr);
279 		return Str.toString(retStr);
280 	}
281 
282 	/**
283 	 * Converts a string from UTF-8 to the encoding GLib uses for
284 	 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
285 	 * on other platforms, this function indirectly depends on the
286 	 * [current locale][setlocale].
287 	 *
288 	 * Params:
289 	 *     utf8string = a UTF-8 encoded string.
290 	 *     len = the length of the string, or -1 if the string is
291 	 *         nul-terminated.
292 	 *     bytesRead = location to store the number of bytes in
293 	 *         the input string that were successfully converted, or %NULL.
294 	 *         Even if the conversion was successful, this may be
295 	 *         less than @len if there were partial characters
296 	 *         at the end of the input. If the error
297 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
298 	 *         stored will the byte offset after the last valid
299 	 *         input sequence.
300 	 *
301 	 * Returns: The converted string, or %NULL on an error.
302 	 *
303 	 * Throws: GException on failure.
304 	 */
305 	public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead)
306 	{
307 		size_t bytesWritten;
308 		GError* err = null;
309 		
310 		auto retStr = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
311 		
312 		if (err !is null)
313 		{
314 			throw new GException( new ErrorG(err) );
315 		}
316 		
317 		scope(exit) Str.freeString(retStr);
318 		return Str.toString(retStr, bytesWritten);
319 	}
320 
321 	/**
322 	 * Converts a string which is in the encoding used by GLib for
323 	 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
324 	 * for filenames; on other platforms, this function indirectly depends on
325 	 * the [current locale][setlocale].
326 	 *
327 	 * Params:
328 	 *     opsysstring = a string in the encoding for filenames
329 	 *     len = the length of the string, or -1 if the string is
330 	 *         nul-terminated (Note that some encodings may allow nul
331 	 *         bytes to occur inside strings. In that case, using -1
332 	 *         for the @len parameter is unsafe)
333 	 *     bytesRead = location to store the number of bytes in the
334 	 *         input string that were successfully converted, or %NULL.
335 	 *         Even if the conversion was successful, this may be
336 	 *         less than @len if there were partial characters
337 	 *         at the end of the input. If the error
338 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
339 	 *         stored will the byte offset after the last valid
340 	 *         input sequence.
341 	 *     bytesWritten = the number of bytes stored in the output
342 	 *         buffer (not including the terminating nul).
343 	 *
344 	 * Returns: The converted string, or %NULL on an error.
345 	 *
346 	 * Throws: GException on failure.
347 	 */
348 	public static string filenameToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
349 	{
350 		GError* err = null;
351 		
352 		auto retStr = g_filename_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err);
353 		
354 		if (err !is null)
355 		{
356 			throw new GException( new ErrorG(err) );
357 		}
358 		
359 		scope(exit) Str.freeString(retStr);
360 		return Str.toString(retStr);
361 	}
362 
363 	/**
364 	 * Obtains the character set for the [current locale][setlocale]; you
365 	 * might use this character set as an argument to g_convert(), to convert
366 	 * from the current locale's encoding to some other encoding. (Frequently
367 	 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
368 	 *
369 	 * On Windows the character set returned by this function is the
370 	 * so-called system default ANSI code-page. That is the character set
371 	 * used by the "narrow" versions of C library and Win32 functions that
372 	 * handle file names. It might be different from the character set
373 	 * used by the C library's current locale.
374 	 *
375 	 * The return value is %TRUE if the locale's encoding is UTF-8, in that
376 	 * case you can perhaps avoid calling g_convert().
377 	 *
378 	 * The string returned in @charset is not allocated, and should not be
379 	 * freed.
380 	 *
381 	 * Params:
382 	 *     charset = return location for character set
383 	 *         name, or %NULL.
384 	 *
385 	 * Returns: %TRUE if the returned charset is UTF-8
386 	 */
387 	public static bool getCharset(out string charset)
388 	{
389 		char* outcharset = null;
390 		
391 		auto p = g_get_charset(&outcharset) != 0;
392 		
393 		charset = Str.toString(outcharset);
394 		
395 		return p;
396 	}
397 
398 	/**
399 	 * Gets the character set for the current locale.
400 	 *
401 	 * Returns: a newly allocated string containing the name
402 	 *     of the character set. This string must be freed with g_free().
403 	 */
404 	public static string getCodeset()
405 	{
406 		auto retStr = g_get_codeset();
407 		
408 		scope(exit) Str.freeString(retStr);
409 		return Str.toString(retStr);
410 	}
411 
412 	/**
413 	 * Determines the preferred character sets used for filenames.
414 	 * The first character set from the @charsets is the filename encoding, the
415 	 * subsequent character sets are used when trying to generate a displayable
416 	 * representation of a filename, see g_filename_display_name().
417 	 *
418 	 * On Unix, the character sets are determined by consulting the
419 	 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
420 	 * On Windows, the character set used in the GLib API is always UTF-8
421 	 * and said environment variables have no effect.
422 	 *
423 	 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
424 	 * character set names. The special token "\@locale" is taken
425 	 * to  mean the character set for the [current locale][setlocale].
426 	 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
427 	 * the character set of the current locale is taken as the filename
428 	 * encoding. If neither environment variable  is set, UTF-8 is taken
429 	 * as the filename encoding, but the character set of the current locale
430 	 * is also put in the list of encodings.
431 	 *
432 	 * The returned @charsets belong to GLib and must not be freed.
433 	 *
434 	 * Note that on Unix, regardless of the locale character set or
435 	 * `G_FILENAME_ENCODING` value, the actual file names present
436 	 * on a system might be in any random encoding or just gibberish.
437 	 *
438 	 * Params:
439 	 *     charsets = return location for the %NULL-terminated list of encoding names
440 	 *
441 	 * Returns: %TRUE if the filename encoding is UTF-8.
442 	 *
443 	 * Since: 2.6
444 	 */
445 	public static bool getFilenameCharsets(string[][] charsets)
446 	{
447 		return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0;
448 	}
449 
450 	/**
451 	 * Converts a string from UTF-8 to the encoding used for strings by
452 	 * the C runtime (usually the same as that used by the operating
453 	 * system) in the [current locale][setlocale]. On Windows this means
454 	 * the system codepage.
455 	 *
456 	 * Params:
457 	 *     utf8string = a UTF-8 encoded string
458 	 *     len = the length of the string, or -1 if the string is
459 	 *         nul-terminated (Note that some encodings may allow nul
460 	 *         bytes to occur inside strings. In that case, using -1
461 	 *         for the @len parameter is unsafe)
462 	 *     bytesRead = location to store the number of bytes in the
463 	 *         input string that were successfully converted, or %NULL.
464 	 *         Even if the conversion was successful, this may be
465 	 *         less than @len if there were partial characters
466 	 *         at the end of the input. If the error
467 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
468 	 *         stored will the byte offset after the last valid
469 	 *         input sequence.
470 	 *     bytesWritten = the number of bytes stored in the output
471 	 *         buffer (not including the terminating nul).
472 	 *
473 	 * Returns: A newly-allocated buffer containing the converted string,
474 	 *     or %NULL on an error, and error will be set.
475 	 *
476 	 * Throws: GException on failure.
477 	 */
478 	public static string localeFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
479 	{
480 		GError* err = null;
481 		
482 		auto retStr = g_locale_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
483 		
484 		if (err !is null)
485 		{
486 			throw new GException( new ErrorG(err) );
487 		}
488 		
489 		scope(exit) Str.freeString(retStr);
490 		return Str.toString(retStr);
491 	}
492 
493 	/**
494 	 * Converts a string which is in the encoding used for strings by
495 	 * the C runtime (usually the same as that used by the operating
496 	 * system) in the [current locale][setlocale] into a UTF-8 string.
497 	 *
498 	 * Params:
499 	 *     opsysstring = a string in the encoding of the current locale. On Windows
500 	 *         this means the system codepage.
501 	 *     len = the length of the string, or -1 if the string is
502 	 *         nul-terminated (Note that some encodings may allow nul
503 	 *         bytes to occur inside strings. In that case, using -1
504 	 *         for the @len parameter is unsafe)
505 	 *     bytesRead = location to store the number of bytes in the
506 	 *         input string that were successfully converted, or %NULL.
507 	 *         Even if the conversion was successful, this may be
508 	 *         less than @len if there were partial characters
509 	 *         at the end of the input. If the error
510 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
511 	 *         stored will the byte offset after the last valid
512 	 *         input sequence.
513 	 *     bytesWritten = the number of bytes stored in the output
514 	 *         buffer (not including the terminating nul).
515 	 *
516 	 * Returns: A newly-allocated buffer containing the converted string,
517 	 *     or %NULL on an error, and error will be set.
518 	 *
519 	 * Throws: GException on failure.
520 	 */
521 	public static string localeToUtf8(string opsysstring, ptrdiff_t len, out size_t bytesRead, out size_t bytesWritten)
522 	{
523 		GError* err = null;
524 		
525 		auto retStr = g_locale_to_utf8(Str.toStringz(opsysstring), len, &bytesRead, &bytesWritten, &err);
526 		
527 		if (err !is null)
528 		{
529 			throw new GException( new ErrorG(err) );
530 		}
531 		
532 		scope(exit) Str.freeString(retStr);
533 		return Str.toString(retStr);
534 	}
535 }