glib.CharacterSet source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.CharacterSet;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import gtkc.glib;
31 public  import gtkc.glibtypes;
32 
33 
34 public struct CharacterSet
35 {
36 	/**
37 	 */
38 
39 	/**
40 	 * Converts a string from one character set to another.
41 	 *
42 	 * Note that you should use g_iconv() for streaming conversions.
43 	 * Despite the fact that @byes_read can return information about partial
44 	 * characters, the g_convert_... functions are not generally suitable
45 	 * for streaming. If the underlying converter maintains internal state,
46 	 * then this won't be preserved across successive calls to g_convert(),
47 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
48 	 * this is the GNU C converter for CP1255 which does not emit a base
49 	 * character until it knows that the next character is not a mark that
50 	 * could combine with the base character.)
51 	 *
52 	 * Using extensions such as "//TRANSLIT" may not work (or may not work
53 	 * well) on many platforms.  Consider using g_str_to_ascii() instead.
54 	 *
55 	 * Params:
56 	 *     str = the string to convert
57 	 *     len = the length of the string in bytes, or -1 if the string is
58 	 *         nul-terminated (Note that some encodings may allow nul
59 	 *         bytes to occur inside strings. In that case, using -1
60 	 *         for the @len parameter is unsafe)
61 	 *     toCodeset = name of character set into which to convert @str
62 	 *     fromCodeset = character set of @str.
63 	 *     bytesRead = location to store the number of bytes in the
64 	 *         input string that were successfully converted, or %NULL.
65 	 *         Even if the conversion was successful, this may be
66 	 *         less than @len if there were partial characters
67 	 *         at the end of the input. If the error
68 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
69 	 *         stored will the byte offset after the last valid
70 	 *         input sequence.
71 	 *     bytesWritten = the number of bytes stored in the output buffer (not
72 	 *         including the terminating nul).
73 	 *
74 	 * Return: If the conversion was successful, a newly allocated
75 	 *     nul-terminated string, which must be freed with
76 	 *     g_free(). Otherwise %NULL and @error will be set.
77 	 *
78 	 * Throws: GException on failure.
79 	 */
80 	public static string convert(string str, ptrdiff_t len, string toCodeset, string fromCodeset, out size_t bytesRead, out size_t bytesWritten)
81 	{
82 		GError* err = null;
83 		
84 		auto p = g_convert(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), &bytesRead, &bytesWritten, &err);
85 		
86 		if (err !is null)
87 		{
88 			throw new GException( new ErrorG(err) );
89 		}
90 		
91 		return Str.toString(p);
92 	}
93 
94 	public static GQuark convertErrorQuark()
95 	{
96 		return g_convert_error_quark();
97 	}
98 
99 	/**
100 	 * Converts a string from one character set to another, possibly
101 	 * including fallback sequences for characters not representable
102 	 * in the output. Note that it is not guaranteed that the specification
103 	 * for the fallback sequences in @fallback will be honored. Some
104 	 * systems may do an approximate conversion from @from_codeset
105 	 * to @to_codeset in their iconv() functions,
106 	 * in which case GLib will simply return that approximate conversion.
107 	 *
108 	 * Note that you should use g_iconv() for streaming conversions.
109 	 * Despite the fact that @byes_read can return information about partial
110 	 * characters, the g_convert_... functions are not generally suitable
111 	 * for streaming. If the underlying converter maintains internal state,
112 	 * then this won't be preserved across successive calls to g_convert(),
113 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
114 	 * this is the GNU C converter for CP1255 which does not emit a base
115 	 * character until it knows that the next character is not a mark that
116 	 * could combine with the base character.)
117 	 *
118 	 * Params:
119 	 *     str = the string to convert
120 	 *     len = the length of the string in bytes, or -1 if the string is
121 	 *         nul-terminated (Note that some encodings may allow nul
122 	 *         bytes to occur inside strings. In that case, using -1
123 	 *         for the @len parameter is unsafe)
124 	 *     toCodeset = name of character set into which to convert @str
125 	 *     fromCodeset = character set of @str.
126 	 *     fallback = UTF-8 string to use in place of character not
127 	 *         present in the target encoding. (The string must be
128 	 *         representable in the target encoding).
129 	 *         If %NULL, characters not in the target encoding will
130 	 *         be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
131 	 *     bytesRead = location to store the number of bytes in the
132 	 *         input string that were successfully converted, or %NULL.
133 	 *         Even if the conversion was successful, this may be
134 	 *         less than @len if there were partial characters
135 	 *         at the end of the input.
136 	 *     bytesWritten = the number of bytes stored in the output buffer (not
137 	 *         including the terminating nul).
138 	 *
139 	 * Return: If the conversion was successful, a newly allocated
140 	 *     nul-terminated string, which must be freed with
141 	 *     g_free(). Otherwise %NULL and @error will be set.
142 	 *
143 	 * Throws: GException on failure.
144 	 */
145 	public static string convertWithFallback(string str, ptrdiff_t len, string toCodeset, string fromCodeset, string fallback, size_t* bytesRead, size_t* bytesWritten)
146 	{
147 		GError* err = null;
148 		
149 		auto p = g_convert_with_fallback(Str.toStringz(str), len, Str.toStringz(toCodeset), Str.toStringz(fromCodeset), Str.toStringz(fallback), bytesRead, bytesWritten, &err);
150 		
151 		if (err !is null)
152 		{
153 			throw new GException( new ErrorG(err) );
154 		}
155 		
156 		return Str.toString(p);
157 	}
158 
159 	/**
160 	 * Converts a string from one character set to another.
161 	 *
162 	 * Note that you should use g_iconv() for streaming conversions.
163 	 * Despite the fact that @byes_read can return information about partial
164 	 * characters, the g_convert_... functions are not generally suitable
165 	 * for streaming. If the underlying converter maintains internal state,
166 	 * then this won't be preserved across successive calls to g_convert(),
167 	 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
168 	 * this is the GNU C converter for CP1255 which does not emit a base
169 	 * character until it knows that the next character is not a mark that
170 	 * could combine with the base character.)
171 	 *
172 	 * Params:
173 	 *     str = the string to convert
174 	 *     len = the length of the string in bytes, or -1 if the string is
175 	 *         nul-terminated (Note that some encodings may allow nul
176 	 *         bytes to occur inside strings. In that case, using -1
177 	 *         for the @len parameter is unsafe)
178 	 *     converter = conversion descriptor from g_iconv_open()
179 	 *     bytesRead = location to store the number of bytes in the
180 	 *         input string that were successfully converted, or %NULL.
181 	 *         Even if the conversion was successful, this may be
182 	 *         less than @len if there were partial characters
183 	 *         at the end of the input. If the error
184 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
185 	 *         stored will the byte offset after the last valid
186 	 *         input sequence.
187 	 *     bytesWritten = the number of bytes stored in the output buffer (not
188 	 *         including the terminating nul).
189 	 *
190 	 * Return: If the conversion was successful, a newly allocated
191 	 *     nul-terminated string, which must be freed with
192 	 *     g_free(). Otherwise %NULL and @error will be set.
193 	 *
194 	 * Throws: GException on failure.
195 	 */
196 	public static string convertWithIconv(string str, ptrdiff_t len, GIConv converter, size_t* bytesRead, size_t* bytesWritten)
197 	{
198 		GError* err = null;
199 		
200 		auto p = g_convert_with_iconv(Str.toStringz(str), len, converter, bytesRead, bytesWritten, &err);
201 		
202 		if (err !is null)
203 		{
204 			throw new GException( new ErrorG(err) );
205 		}
206 		
207 		return Str.toString(p);
208 	}
209 
210 	/**
211 	 * Returns the display basename for the particular filename, guaranteed
212 	 * to be valid UTF-8. The display name might not be identical to the filename,
213 	 * for instance there might be problems converting it to UTF-8, and some files
214 	 * can be translated in the display.
215 	 *
216 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
217 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
218 	 * You can search the result for the UTF-8 encoding of this character (which is
219 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
220 	 * encoding.
221 	 *
222 	 * You must pass the whole absolute pathname to this functions so that
223 	 * translation of well known locations can be done.
224 	 *
225 	 * This function is preferred over g_filename_display_name() if you know the
226 	 * whole path, as it allows translation.
227 	 *
228 	 * Params:
229 	 *     filename = an absolute pathname in the GLib file name encoding
230 	 *
231 	 * Return: a newly allocated string containing
232 	 *     a rendition of the basename of the filename in valid UTF-8
233 	 *
234 	 * Since: 2.6
235 	 */
236 	public static string filenameDisplayBasename(string filename)
237 	{
238 		return Str.toString(g_filename_display_basename(Str.toStringz(filename)));
239 	}
240 
241 	/**
242 	 * Converts a filename into a valid UTF-8 string. The conversion is
243 	 * not necessarily reversible, so you should keep the original around
244 	 * and use the return value of this function only for display purposes.
245 	 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
246 	 * even if the filename actually isn't in the GLib file name encoding.
247 	 *
248 	 * If GLib cannot make sense of the encoding of @filename, as a last resort it
249 	 * replaces unknown characters with U+FFFD, the Unicode replacement character.
250 	 * You can search the result for the UTF-8 encoding of this character (which is
251 	 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
252 	 * encoding.
253 	 *
254 	 * If you know the whole pathname of the file you should use
255 	 * g_filename_display_basename(), since that allows location-based
256 	 * translation of filenames.
257 	 *
258 	 * Params:
259 	 *     filename = a pathname hopefully in the GLib file name encoding
260 	 *
261 	 * Return: a newly allocated string containing
262 	 *     a rendition of the filename in valid UTF-8
263 	 *
264 	 * Since: 2.6
265 	 */
266 	public static string filenameDisplayName(string filename)
267 	{
268 		return Str.toString(g_filename_display_name(Str.toStringz(filename)));
269 	}
270 
271 	/**
272 	 * Converts a string from UTF-8 to the encoding GLib uses for
273 	 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
274 	 * on other platforms, this function indirectly depends on the
275 	 * [current locale][setlocale].
276 	 *
277 	 * Params:
278 	 *     utf8string = a UTF-8 encoded string.
279 	 *     len = the length of the string, or -1 if the string is
280 	 *         nul-terminated.
281 	 *     bytesRead = location to store the number of bytes in
282 	 *         the input string that were successfully converted, or %NULL.
283 	 *         Even if the conversion was successful, this may be
284 	 *         less than @len if there were partial characters
285 	 *         at the end of the input. If the error
286 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
287 	 *         stored will the byte offset after the last valid
288 	 *         input sequence.
289 	 *
290 	 * Return: The converted string, or %NULL on an error.
291 	 *
292 	 * Throws: GException on failure.
293 	 */
294 	public static string filenameFromUtf8(string utf8string, ptrdiff_t len, out size_t bytesRead)
295 	{
296 		size_t bytesWritten;
297 		GError* err = null;
298 		
299 		auto p = g_filename_from_utf8(Str.toStringz(utf8string), len, &bytesRead, &bytesWritten, &err);
300 		
301 		if (err !is null)
302 		{
303 			throw new GException( new ErrorG(err) );
304 		}
305 		
306 		return Str.toString(p, bytesWritten);
307 	}
308 
309 	/**
310 	 * Converts a string which is in the encoding used by GLib for
311 	 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
312 	 * for filenames; on other platforms, this function indirectly depends on
313 	 * the [current locale][setlocale].
314 	 *
315 	 * Params:
316 	 *     opsysstring = a string in the encoding for filenames
317 	 *     len = the length of the string, or -1 if the string is
318 	 *         nul-terminated (Note that some encodings may allow nul
319 	 *         bytes to occur inside strings. In that case, using -1
320 	 *         for the @len parameter is unsafe)
321 	 *     bytesRead = location to store the number of bytes in the
322 	 *         input string that were successfully converted, or %NULL.
323 	 *         Even if the conversion was successful, this may be
324 	 *         less than @len if there were partial characters
325 	 *         at the end of the input. If the error
326 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
327 	 *         stored will the byte offset after the last valid
328 	 *         input sequence.
329 	 *     bytesWritten = the number of bytes stored in the output buffer (not
330 	 *         including the terminating nul).
331 	 *
332 	 * Return: The converted string, or %NULL on an error.
333 	 *
334 	 * Throws: GException on failure.
335 	 */
336 	public static string filenameToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten)
337 	{
338 		GError* err = null;
339 		
340 		auto p = g_filename_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err);
341 		
342 		if (err !is null)
343 		{
344 			throw new GException( new ErrorG(err) );
345 		}
346 		
347 		return Str.toString(p);
348 	}
349 
350 	/**
351 	 * Obtains the character set for the [current locale][setlocale]; you
352 	 * might use this character set as an argument to g_convert(), to convert
353 	 * from the current locale's encoding to some other encoding. (Frequently
354 	 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
355 	 *
356 	 * On Windows the character set returned by this function is the
357 	 * so-called system default ANSI code-page. That is the character set
358 	 * used by the "narrow" versions of C library and Win32 functions that
359 	 * handle file names. It might be different from the character set
360 	 * used by the C library's current locale.
361 	 *
362 	 * The return value is %TRUE if the locale's encoding is UTF-8, in that
363 	 * case you can perhaps avoid calling g_convert().
364 	 *
365 	 * The string returned in @charset is not allocated, and should not be
366 	 * freed.
367 	 *
368 	 * Params:
369 	 *     charset = return location for character set
370 	 *         name, or %NULL.
371 	 *
372 	 * Return: %TRUE if the returned charset is UTF-8
373 	 */
374 	public static bool getCharset(out string charset)
375 	{
376 		char* outcharset = null;
377 		
378 		auto p = g_get_charset(&outcharset) != 0;
379 		
380 		charset = Str.toString(outcharset);
381 		
382 		return p;
383 	}
384 
385 	/**
386 	 * Gets the character set for the current locale.
387 	 *
388 	 * Return: a newly allocated string containing the name
389 	 *     of the character set. This string must be freed with g_free().
390 	 */
391 	public static string getCodeset()
392 	{
393 		return Str.toString(g_get_codeset());
394 	}
395 
396 	/**
397 	 * Determines the preferred character sets used for filenames.
398 	 * The first character set from the @charsets is the filename encoding, the
399 	 * subsequent character sets are used when trying to generate a displayable
400 	 * representation of a filename, see g_filename_display_name().
401 	 *
402 	 * On Unix, the character sets are determined by consulting the
403 	 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
404 	 * On Windows, the character set used in the GLib API is always UTF-8
405 	 * and said environment variables have no effect.
406 	 *
407 	 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
408 	 * character set names. The special token "&commat;locale" is taken
409 	 * to  mean the character set for the [current locale][setlocale].
410 	 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
411 	 * the character set of the current locale is taken as the filename
412 	 * encoding. If neither environment variable  is set, UTF-8 is taken
413 	 * as the filename encoding, but the character set of the current locale
414 	 * is also put in the list of encodings.
415 	 *
416 	 * The returned @charsets belong to GLib and must not be freed.
417 	 *
418 	 * Note that on Unix, regardless of the locale character set or
419 	 * `G_FILENAME_ENCODING` value, the actual file names present
420 	 * on a system might be in any random encoding or just gibberish.
421 	 *
422 	 * Params:
423 	 *     charsets = return location for the %NULL-terminated list of encoding names
424 	 *
425 	 * Return: %TRUE if the filename encoding is UTF-8.
426 	 *
427 	 * Since: 2.6
428 	 */
429 	public static bool getFilenameCharsets(string[][] charsets)
430 	{
431 		return g_get_filename_charsets(Str.toStringzArray(charsets)) != 0;
432 	}
433 
434 	/**
435 	 * Converts a string from UTF-8 to the encoding used for strings by
436 	 * the C runtime (usually the same as that used by the operating
437 	 * system) in the [current locale][setlocale]. On Windows this means
438 	 * the system codepage.
439 	 *
440 	 * Params:
441 	 *     utf8string = a UTF-8 encoded string
442 	 *     len = the length of the string, or -1 if the string is
443 	 *         nul-terminated (Note that some encodings may allow nul
444 	 *         bytes to occur inside strings. In that case, using -1
445 	 *         for the @len parameter is unsafe)
446 	 *     bytesRead = location to store the number of bytes in the
447 	 *         input string that were successfully converted, or %NULL.
448 	 *         Even if the conversion was successful, this may be
449 	 *         less than @len if there were partial characters
450 	 *         at the end of the input. If the error
451 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
452 	 *         stored will the byte offset after the last valid
453 	 *         input sequence.
454 	 *     bytesWritten = the number of bytes stored in the output buffer (not
455 	 *         including the terminating nul).
456 	 *
457 	 * Return: A newly-allocated buffer containing the converted string,
458 	 *     or %NULL on an error, and error will be set.
459 	 *
460 	 * Throws: GException on failure.
461 	 */
462 	public static string localeFromUtf8(string utf8string, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten)
463 	{
464 		GError* err = null;
465 		
466 		auto p = g_locale_from_utf8(Str.toStringz(utf8string), len, bytesRead, bytesWritten, &err);
467 		
468 		if (err !is null)
469 		{
470 			throw new GException( new ErrorG(err) );
471 		}
472 		
473 		return Str.toString(p);
474 	}
475 
476 	/**
477 	 * Converts a string which is in the encoding used for strings by
478 	 * the C runtime (usually the same as that used by the operating
479 	 * system) in the [current locale][setlocale] into a UTF-8 string.
480 	 *
481 	 * Params:
482 	 *     opsysstring = a string in the encoding of the current locale. On Windows
483 	 *         this means the system codepage.
484 	 *     len = the length of the string, or -1 if the string is
485 	 *         nul-terminated (Note that some encodings may allow nul
486 	 *         bytes to occur inside strings. In that case, using -1
487 	 *         for the @len parameter is unsafe)
488 	 *     bytesRead = location to store the number of bytes in the
489 	 *         input string that were successfully converted, or %NULL.
490 	 *         Even if the conversion was successful, this may be
491 	 *         less than @len if there were partial characters
492 	 *         at the end of the input. If the error
493 	 *         #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
494 	 *         stored will the byte offset after the last valid
495 	 *         input sequence.
496 	 *     bytesWritten = the number of bytes stored in the output buffer (not
497 	 *         including the terminating nul).
498 	 *
499 	 * Return: A newly-allocated buffer containing the converted string,
500 	 *     or %NULL on an error, and error will be set.
501 	 *
502 	 * Throws: GException on failure.
503 	 */
504 	public static string localeToUtf8(string opsysstring, ptrdiff_t len, size_t* bytesRead, size_t* bytesWritten)
505 	{
506 		GError* err = null;
507 		
508 		auto p = g_locale_to_utf8(Str.toStringz(opsysstring), len, bytesRead, bytesWritten, &err);
509 		
510 		if (err !is null)
511 		{
512 			throw new GException( new ErrorG(err) );
513 		}
514 		
515 		return Str.toString(p);
516 	}
517 }