glib.Unicode source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.Unicode;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import gtkc.glib;
31 public  import gtkc.glibtypes;
32 
33 
34 /** */
35 public struct Unicode
36 {
37 
38 	/**
39 	 * Convert a string from UCS-4 to UTF-16. A 0 character will be
40 	 * added to the result after the converted text.
41 	 *
42 	 * Params:
43 	 *     str = a UCS-4 encoded string
44 	 *     len = the maximum length (number of characters) of @str to use.
45 	 *         If @len < 0, then the string is nul-terminated.
46 	 *     itemsRead = location to store number of bytes read,
47 	 *         or %NULL. If an error occurs then the index of the invalid input
48 	 *         is stored here.
49 	 *     itemsWritten = location to store number of #gunichar2
50 	 *         written, or %NULL. The value stored here does not include the
51 	 *         trailing 0.
52 	 *
53 	 * Return: a pointer to a newly allocated UTF-16 string.
54 	 *     This value must be freed with g_free(). If an error occurs,
55 	 *     %NULL will be returned and @error set.
56 	 *
57 	 * Throws: GException on failure.
58 	 */
59 	public static wchar* ucs4ToUtf16(dchar* str, glong len, glong* itemsRead, glong* itemsWritten)
60 	{
61 		GError* err = null;
62 		
63 		auto p = g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, &err);
64 		
65 		if (err !is null)
66 		{
67 			throw new GException( new ErrorG(err) );
68 		}
69 		
70 		return p;
71 	}
72 
73 	/**
74 	 * Convert a string from a 32-bit fixed width representation as UCS-4.
75 	 * to UTF-8. The result will be terminated with a 0 byte.
76 	 *
77 	 * Params:
78 	 *     str = a UCS-4 encoded string
79 	 *     len = the maximum length (number of characters) of @str to use.
80 	 *         If @len < 0, then the string is nul-terminated.
81 	 *     itemsRead = location to store number of characters
82 	 *         read, or %NULL.
83 	 *     itemsWritten = location to store number of bytes
84 	 *         written or %NULL. The value here stored does not include the
85 	 *         trailing 0 byte.
86 	 *
87 	 * Return: a pointer to a newly allocated UTF-8 string.
88 	 *     This value must be freed with g_free(). If an error occurs,
89 	 *     %NULL will be returned and @error set. In that case, @items_read
90 	 *     will be set to the position of the first invalid input character.
91 	 *
92 	 * Throws: GException on failure.
93 	 */
94 	public static string ucs4ToUtf8(dchar* str, glong len, glong* itemsRead, glong* itemsWritten)
95 	{
96 		GError* err = null;
97 		
98 		auto p = g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, &err);
99 		
100 		if (err !is null)
101 		{
102 			throw new GException( new ErrorG(err) );
103 		}
104 		
105 		return Str.toString(p);
106 	}
107 
108 	/**
109 	 * Determines the break type of @c. @c should be a Unicode character
110 	 * (to derive a character from UTF-8 encoded text, use
111 	 * g_utf8_get_char()). The break type is used to find word and line
112 	 * breaks ("text boundaries"), Pango implements the Unicode boundary
113 	 * resolution algorithms and normally you would use a function such
114 	 * as pango_break() instead of caring about break types yourself.
115 	 *
116 	 * Params:
117 	 *     c = a Unicode character
118 	 *
119 	 * Return: the break type of @c
120 	 */
121 	public static GUnicodeBreakType unicharBreakType(dchar c)
122 	{
123 		return g_unichar_break_type(c);
124 	}
125 
126 	/**
127 	 * Determines the canonical combining class of a Unicode character.
128 	 *
129 	 * Params:
130 	 *     uc = a Unicode character
131 	 *
132 	 * Return: the combining class of the character
133 	 *
134 	 * Since: 2.14
135 	 */
136 	public static int unicharCombiningClass(dchar uc)
137 	{
138 		return g_unichar_combining_class(uc);
139 	}
140 
141 	/**
142 	 * Performs a single composition step of the
143 	 * Unicode canonical composition algorithm.
144 	 *
145 	 * This function includes algorithmic Hangul Jamo composition,
146 	 * but it is not exactly the inverse of g_unichar_decompose().
147 	 * No composition can have either of @a or @b equal to zero.
148 	 * To be precise, this function composes if and only if
149 	 * there exists a Primary Composite P which is canonically
150 	 * equivalent to the sequence <@a,@b>.  See the Unicode
151 	 * Standard for the definition of Primary Composite.
152 	 *
153 	 * If @a and @b do not compose a new character, @ch is set to zero.
154 	 *
155 	 * See
156 	 * [UAX#15](http://unicode.org/reports/tr15/)
157 	 * for details.
158 	 *
159 	 * Params:
160 	 *     a = a Unicode character
161 	 *     b = a Unicode character
162 	 *     ch = return location for the composed character
163 	 *
164 	 * Return: %TRUE if the characters could be composed
165 	 *
166 	 * Since: 2.30
167 	 */
168 	public static bool unicharCompose(dchar a, dchar b, dchar* ch)
169 	{
170 		return g_unichar_compose(a, b, ch) != 0;
171 	}
172 
173 	/**
174 	 * Performs a single decomposition step of the
175 	 * Unicode canonical decomposition algorithm.
176 	 *
177 	 * This function does not include compatibility
178 	 * decompositions. It does, however, include algorithmic
179 	 * Hangul Jamo decomposition, as well as 'singleton'
180 	 * decompositions which replace a character by a single
181 	 * other character. In the case of singletons *@b will
182 	 * be set to zero.
183 	 *
184 	 * If @ch is not decomposable, *@a is set to @ch and *@b
185 	 * is set to zero.
186 	 *
187 	 * Note that the way Unicode decomposition pairs are
188 	 * defined, it is guaranteed that @b would not decompose
189 	 * further, but @a may itself decompose.  To get the full
190 	 * canonical decomposition for @ch, one would need to
191 	 * recursively call this function on @a.  Or use
192 	 * g_unichar_fully_decompose().
193 	 *
194 	 * See
195 	 * [UAX#15](http://unicode.org/reports/tr15/)
196 	 * for details.
197 	 *
198 	 * Params:
199 	 *     ch = a Unicode character
200 	 *     a = return location for the first component of @ch
201 	 *     b = return location for the second component of @ch
202 	 *
203 	 * Return: %TRUE if the character could be decomposed
204 	 *
205 	 * Since: 2.30
206 	 */
207 	public static bool unicharDecompose(dchar ch, dchar* a, dchar* b)
208 	{
209 		return g_unichar_decompose(ch, a, b) != 0;
210 	}
211 
212 	/**
213 	 * Determines the numeric value of a character as a decimal
214 	 * digit.
215 	 *
216 	 * Params:
217 	 *     c = a Unicode character
218 	 *
219 	 * Return: If @c is a decimal digit (according to
220 	 *     g_unichar_isdigit()), its numeric value. Otherwise, -1.
221 	 */
222 	public static int unicharDigitValue(dchar c)
223 	{
224 		return g_unichar_digit_value(c);
225 	}
226 
227 	/**
228 	 * Computes the canonical or compatibility decomposition of a
229 	 * Unicode character.  For compatibility decomposition,
230 	 * pass %TRUE for @compat; for canonical decomposition
231 	 * pass %FALSE for @compat.
232 	 *
233 	 * The decomposed sequence is placed in @result.  Only up to
234 	 * @result_len characters are written into @result.  The length
235 	 * of the full decomposition (irrespective of @result_len) is
236 	 * returned by the function.  For canonical decomposition,
237 	 * currently all decompositions are of length at most 4, but
238 	 * this may change in the future (very unlikely though).
239 	 * At any rate, Unicode does guarantee that a buffer of length
240 	 * 18 is always enough for both compatibility and canonical
241 	 * decompositions, so that is the size recommended. This is provided
242 	 * as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.
243 	 *
244 	 * See
245 	 * [UAX#15](http://unicode.org/reports/tr15/)
246 	 * for details.
247 	 *
248 	 * Params:
249 	 *     ch = a Unicode character.
250 	 *     compat = whether perform canonical or compatibility decomposition
251 	 *     result = location to store decomposed result, or %NULL
252 	 *     resultLen = length of @result
253 	 *
254 	 * Return: the length of the full decomposition.
255 	 *
256 	 * Since: 2.30
257 	 */
258 	public static size_t unicharFullyDecompose(dchar ch, bool compat, dchar* result, size_t resultLen)
259 	{
260 		return g_unichar_fully_decompose(ch, compat, result, resultLen);
261 	}
262 
263 	/**
264 	 * In Unicode, some characters are "mirrored". This means that their
265 	 * images are mirrored horizontally in text that is laid out from right
266 	 * to left. For instance, "(" would become its mirror image, ")", in
267 	 * right-to-left text.
268 	 *
269 	 * If @ch has the Unicode mirrored property and there is another unicode
270 	 * character that typically has a glyph that is the mirror image of @ch's
271 	 * glyph and @mirrored_ch is set, it puts that character in the address
272 	 * pointed to by @mirrored_ch.  Otherwise the original character is put.
273 	 *
274 	 * Params:
275 	 *     ch = a Unicode character
276 	 *     mirroredCh = location to store the mirrored character
277 	 *
278 	 * Return: %TRUE if @ch has a mirrored character, %FALSE otherwise
279 	 *
280 	 * Since: 2.4
281 	 */
282 	public static bool unicharGetMirrorChar(dchar ch, dchar* mirroredCh)
283 	{
284 		return g_unichar_get_mirror_char(ch, mirroredCh) != 0;
285 	}
286 
287 	/**
288 	 * Looks up the #GUnicodeScript for a particular character (as defined
289 	 * by Unicode Standard Annex \#24). No check is made for @ch being a
290 	 * valid Unicode character; if you pass in invalid character, the
291 	 * result is undefined.
292 	 *
293 	 * This function is equivalent to pango_script_for_unichar() and the
294 	 * two are interchangeable.
295 	 *
296 	 * Params:
297 	 *     ch = a Unicode character
298 	 *
299 	 * Return: the #GUnicodeScript for the character.
300 	 *
301 	 * Since: 2.14
302 	 */
303 	public static GUnicodeScript unicharGetScript(dchar ch)
304 	{
305 		return g_unichar_get_script(ch);
306 	}
307 
308 	/**
309 	 * Determines whether a character is alphanumeric.
310 	 * Given some UTF-8 text, obtain a character value
311 	 * with g_utf8_get_char().
312 	 *
313 	 * Params:
314 	 *     c = a Unicode character
315 	 *
316 	 * Return: %TRUE if @c is an alphanumeric character
317 	 */
318 	public static bool unicharIsalnum(dchar c)
319 	{
320 		return g_unichar_isalnum(c) != 0;
321 	}
322 
323 	/**
324 	 * Determines whether a character is alphabetic (i.e. a letter).
325 	 * Given some UTF-8 text, obtain a character value with
326 	 * g_utf8_get_char().
327 	 *
328 	 * Params:
329 	 *     c = a Unicode character
330 	 *
331 	 * Return: %TRUE if @c is an alphabetic character
332 	 */
333 	public static bool unicharIsalpha(dchar c)
334 	{
335 		return g_unichar_isalpha(c) != 0;
336 	}
337 
338 	/**
339 	 * Determines whether a character is a control character.
340 	 * Given some UTF-8 text, obtain a character value with
341 	 * g_utf8_get_char().
342 	 *
343 	 * Params:
344 	 *     c = a Unicode character
345 	 *
346 	 * Return: %TRUE if @c is a control character
347 	 */
348 	public static bool unicharIscntrl(dchar c)
349 	{
350 		return g_unichar_iscntrl(c) != 0;
351 	}
352 
353 	/**
354 	 * Determines if a given character is assigned in the Unicode
355 	 * standard.
356 	 *
357 	 * Params:
358 	 *     c = a Unicode character
359 	 *
360 	 * Return: %TRUE if the character has an assigned value
361 	 */
362 	public static bool unicharIsdefined(dchar c)
363 	{
364 		return g_unichar_isdefined(c) != 0;
365 	}
366 
367 	/**
368 	 * Determines whether a character is numeric (i.e. a digit).  This
369 	 * covers ASCII 0-9 and also digits in other languages/scripts.  Given
370 	 * some UTF-8 text, obtain a character value with g_utf8_get_char().
371 	 *
372 	 * Params:
373 	 *     c = a Unicode character
374 	 *
375 	 * Return: %TRUE if @c is a digit
376 	 */
377 	public static bool unicharIsdigit(dchar c)
378 	{
379 		return g_unichar_isdigit(c) != 0;
380 	}
381 
382 	/**
383 	 * Determines whether a character is printable and not a space
384 	 * (returns %FALSE for control characters, format characters, and
385 	 * spaces). g_unichar_isprint() is similar, but returns %TRUE for
386 	 * spaces. Given some UTF-8 text, obtain a character value with
387 	 * g_utf8_get_char().
388 	 *
389 	 * Params:
390 	 *     c = a Unicode character
391 	 *
392 	 * Return: %TRUE if @c is printable unless it's a space
393 	 */
394 	public static bool unicharIsgraph(dchar c)
395 	{
396 		return g_unichar_isgraph(c) != 0;
397 	}
398 
399 	/**
400 	 * Determines whether a character is a lowercase letter.
401 	 * Given some UTF-8 text, obtain a character value with
402 	 * g_utf8_get_char().
403 	 *
404 	 * Params:
405 	 *     c = a Unicode character
406 	 *
407 	 * Return: %TRUE if @c is a lowercase letter
408 	 */
409 	public static bool unicharIslower(dchar c)
410 	{
411 		return g_unichar_islower(c) != 0;
412 	}
413 
414 	/**
415 	 * Determines whether a character is a mark (non-spacing mark,
416 	 * combining mark, or enclosing mark in Unicode speak).
417 	 * Given some UTF-8 text, obtain a character value
418 	 * with g_utf8_get_char().
419 	 *
420 	 * Note: in most cases where isalpha characters are allowed,
421 	 * ismark characters should be allowed to as they are essential
422 	 * for writing most European languages as well as many non-Latin
423 	 * scripts.
424 	 *
425 	 * Params:
426 	 *     c = a Unicode character
427 	 *
428 	 * Return: %TRUE if @c is a mark character
429 	 *
430 	 * Since: 2.14
431 	 */
432 	public static bool unicharIsmark(dchar c)
433 	{
434 		return g_unichar_ismark(c) != 0;
435 	}
436 
437 	/**
438 	 * Determines whether a character is printable.
439 	 * Unlike g_unichar_isgraph(), returns %TRUE for spaces.
440 	 * Given some UTF-8 text, obtain a character value with
441 	 * g_utf8_get_char().
442 	 *
443 	 * Params:
444 	 *     c = a Unicode character
445 	 *
446 	 * Return: %TRUE if @c is printable
447 	 */
448 	public static bool unicharIsprint(dchar c)
449 	{
450 		return g_unichar_isprint(c) != 0;
451 	}
452 
453 	/**
454 	 * Determines whether a character is punctuation or a symbol.
455 	 * Given some UTF-8 text, obtain a character value with
456 	 * g_utf8_get_char().
457 	 *
458 	 * Params:
459 	 *     c = a Unicode character
460 	 *
461 	 * Return: %TRUE if @c is a punctuation or symbol character
462 	 */
463 	public static bool unicharIspunct(dchar c)
464 	{
465 		return g_unichar_ispunct(c) != 0;
466 	}
467 
468 	/**
469 	 * Determines whether a character is a space, tab, or line separator
470 	 * (newline, carriage return, etc.).  Given some UTF-8 text, obtain a
471 	 * character value with g_utf8_get_char().
472 	 *
473 	 * (Note: don't use this to do word breaking; you have to use
474 	 * Pango or equivalent to get word breaking right, the algorithm
475 	 * is fairly complex.)
476 	 *
477 	 * Params:
478 	 *     c = a Unicode character
479 	 *
480 	 * Return: %TRUE if @c is a space character
481 	 */
482 	public static bool unicharIsspace(dchar c)
483 	{
484 		return g_unichar_isspace(c) != 0;
485 	}
486 
487 	/**
488 	 * Determines if a character is titlecase. Some characters in
489 	 * Unicode which are composites, such as the DZ digraph
490 	 * have three case variants instead of just two. The titlecase
491 	 * form is used at the beginning of a word where only the
492 	 * first letter is capitalized. The titlecase form of the DZ
493 	 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
494 	 *
495 	 * Params:
496 	 *     c = a Unicode character
497 	 *
498 	 * Return: %TRUE if the character is titlecase
499 	 */
500 	public static bool unicharIstitle(dchar c)
501 	{
502 		return g_unichar_istitle(c) != 0;
503 	}
504 
505 	/**
506 	 * Determines if a character is uppercase.
507 	 *
508 	 * Params:
509 	 *     c = a Unicode character
510 	 *
511 	 * Return: %TRUE if @c is an uppercase character
512 	 */
513 	public static bool unicharIsupper(dchar c)
514 	{
515 		return g_unichar_isupper(c) != 0;
516 	}
517 
518 	/**
519 	 * Determines if a character is typically rendered in a double-width
520 	 * cell.
521 	 *
522 	 * Params:
523 	 *     c = a Unicode character
524 	 *
525 	 * Return: %TRUE if the character is wide
526 	 */
527 	public static bool unicharIswide(dchar c)
528 	{
529 		return g_unichar_iswide(c) != 0;
530 	}
531 
532 	/**
533 	 * Determines if a character is typically rendered in a double-width
534 	 * cell under legacy East Asian locales.  If a character is wide according to
535 	 * g_unichar_iswide(), then it is also reported wide with this function, but
536 	 * the converse is not necessarily true. See the
537 	 * [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
538 	 * for details.
539 	 *
540 	 * If a character passes the g_unichar_iswide() test then it will also pass
541 	 * this test, but not the other way around.  Note that some characters may
542 	 * pass both this test and g_unichar_iszerowidth().
543 	 *
544 	 * Params:
545 	 *     c = a Unicode character
546 	 *
547 	 * Return: %TRUE if the character is wide in legacy East Asian locales
548 	 *
549 	 * Since: 2.12
550 	 */
551 	public static bool unicharIswideCjk(dchar c)
552 	{
553 		return g_unichar_iswide_cjk(c) != 0;
554 	}
555 
556 	/**
557 	 * Determines if a character is a hexidecimal digit.
558 	 *
559 	 * Params:
560 	 *     c = a Unicode character.
561 	 *
562 	 * Return: %TRUE if the character is a hexadecimal digit
563 	 */
564 	public static bool unicharIsxdigit(dchar c)
565 	{
566 		return g_unichar_isxdigit(c) != 0;
567 	}
568 
569 	/**
570 	 * Determines if a given character typically takes zero width when rendered.
571 	 * The return value is %TRUE for all non-spacing and enclosing marks
572 	 * (e.g., combining accents), format characters, zero-width
573 	 * space, but not U+00AD SOFT HYPHEN.
574 	 *
575 	 * A typical use of this function is with one of g_unichar_iswide() or
576 	 * g_unichar_iswide_cjk() to determine the number of cells a string occupies
577 	 * when displayed on a grid display (terminals).  However, note that not all
578 	 * terminals support zero-width rendering of zero-width marks.
579 	 *
580 	 * Params:
581 	 *     c = a Unicode character
582 	 *
583 	 * Return: %TRUE if the character has zero width
584 	 *
585 	 * Since: 2.14
586 	 */
587 	public static bool unicharIszerowidth(dchar c)
588 	{
589 		return g_unichar_iszerowidth(c) != 0;
590 	}
591 
592 	/**
593 	 * Converts a single character to UTF-8.
594 	 *
595 	 * Params:
596 	 *     c = a Unicode character code
597 	 *     outbuf = output buffer, must have at least 6 bytes of space.
598 	 *         If %NULL, the length will be computed and returned
599 	 *         and nothing will be written to @outbuf.
600 	 *
601 	 * Return: number of bytes written
602 	 */
603 	public static int unicharToUtf8(dchar c, string outbuf)
604 	{
605 		return g_unichar_to_utf8(c, Str.toStringz(outbuf));
606 	}
607 
608 	/**
609 	 * Converts a character to lower case.
610 	 *
611 	 * Params:
612 	 *     c = a Unicode character.
613 	 *
614 	 * Return: the result of converting @c to lower case.
615 	 *     If @c is not an upperlower or titlecase character,
616 	 *     or has no lowercase equivalent @c is returned unchanged.
617 	 */
618 	public static dchar unicharTolower(dchar c)
619 	{
620 		return g_unichar_tolower(c);
621 	}
622 
623 	/**
624 	 * Converts a character to the titlecase.
625 	 *
626 	 * Params:
627 	 *     c = a Unicode character
628 	 *
629 	 * Return: the result of converting @c to titlecase.
630 	 *     If @c is not an uppercase or lowercase character,
631 	 *     @c is returned unchanged.
632 	 */
633 	public static dchar unicharTotitle(dchar c)
634 	{
635 		return g_unichar_totitle(c);
636 	}
637 
638 	/**
639 	 * Converts a character to uppercase.
640 	 *
641 	 * Params:
642 	 *     c = a Unicode character
643 	 *
644 	 * Return: the result of converting @c to uppercase.
645 	 *     If @c is not an lowercase or titlecase character,
646 	 *     or has no upper case equivalent @c is returned unchanged.
647 	 */
648 	public static dchar unicharToupper(dchar c)
649 	{
650 		return g_unichar_toupper(c);
651 	}
652 
653 	/**
654 	 * Classifies a Unicode character by type.
655 	 *
656 	 * Params:
657 	 *     c = a Unicode character
658 	 *
659 	 * Return: the type of the character.
660 	 */
661 	public static GUnicodeType unicharType(dchar c)
662 	{
663 		return g_unichar_type(c);
664 	}
665 
666 	/**
667 	 * Checks whether @ch is a valid Unicode character. Some possible
668 	 * integer values of @ch will not be valid. 0 is considered a valid
669 	 * character, though it's normally a string terminator.
670 	 *
671 	 * Params:
672 	 *     ch = a Unicode character
673 	 *
674 	 * Return: %TRUE if @ch is a valid Unicode character
675 	 */
676 	public static bool unicharValidate(dchar ch)
677 	{
678 		return g_unichar_validate(ch) != 0;
679 	}
680 
681 	/**
682 	 * Determines the numeric value of a character as a hexidecimal
683 	 * digit.
684 	 *
685 	 * Params:
686 	 *     c = a Unicode character
687 	 *
688 	 * Return: If @c is a hex digit (according to
689 	 *     g_unichar_isxdigit()), its numeric value. Otherwise, -1.
690 	 */
691 	public static int unicharXdigitValue(dchar c)
692 	{
693 		return g_unichar_xdigit_value(c);
694 	}
695 
696 	/**
697 	 * Computes the canonical decomposition of a Unicode character.
698 	 *
699 	 * Deprecated: Use the more flexible g_unichar_fully_decompose()
700 	 * instead.
701 	 *
702 	 * Params:
703 	 *     ch = a Unicode character.
704 	 *     resultLen = location to store the length of the return value.
705 	 *
706 	 * Return: a newly allocated string of Unicode characters.
707 	 *     @result_len is set to the resulting length of the string.
708 	 */
709 	public static dchar* unicodeCanonicalDecomposition(dchar ch, size_t* resultLen)
710 	{
711 		return g_unicode_canonical_decomposition(ch, resultLen);
712 	}
713 
714 	/**
715 	 * Computes the canonical ordering of a string in-place.
716 	 * This rearranges decomposed characters in the string
717 	 * according to their combining classes.  See the Unicode
718 	 * manual for more information.
719 	 *
720 	 * Params:
721 	 *     str = a UCS-4 encoded string.
722 	 *     len = the maximum length of @string to use.
723 	 */
724 	public static void unicodeCanonicalOrdering(dchar* str, size_t len)
725 	{
726 		g_unicode_canonical_ordering(str, len);
727 	}
728 
729 	/**
730 	 * Looks up the Unicode script for @iso15924.  ISO 15924 assigns four-letter
731 	 * codes to scripts.  For example, the code for Arabic is 'Arab'.
732 	 * This function accepts four letter codes encoded as a @guint32 in a
733 	 * big-endian fashion.  That is, the code expected for Arabic is
734 	 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
735 	 *
736 	 * See
737 	 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
738 	 * for details.
739 	 *
740 	 * Params:
741 	 *     iso15924 = a Unicode script
742 	 *
743 	 * Return: the Unicode script for @iso15924, or
744 	 *     of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and
745 	 *     %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown.
746 	 *
747 	 * Since: 2.30
748 	 */
749 	public static GUnicodeScript unicodeScriptFromIso15924(uint iso15924)
750 	{
751 		return g_unicode_script_from_iso15924(iso15924);
752 	}
753 
754 	/**
755 	 * Looks up the ISO 15924 code for @script.  ISO 15924 assigns four-letter
756 	 * codes to scripts.  For example, the code for Arabic is 'Arab'.  The
757 	 * four letter codes are encoded as a @guint32 by this function in a
758 	 * big-endian fashion.  That is, the code returned for Arabic is
759 	 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
760 	 *
761 	 * See
762 	 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
763 	 * for details.
764 	 *
765 	 * Params:
766 	 *     script = a Unicode script
767 	 *
768 	 * Return: the ISO 15924 code for @script, encoded as an integer,
769 	 *     of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or
770 	 *     ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood.
771 	 *
772 	 * Since: 2.30
773 	 */
774 	public static uint unicodeScriptToIso15924(GUnicodeScript script)
775 	{
776 		return g_unicode_script_to_iso15924(script);
777 	}
778 
779 	/**
780 	 * Convert a string from UTF-16 to UCS-4. The result will be
781 	 * nul-terminated.
782 	 *
783 	 * Params:
784 	 *     str = a UTF-16 encoded string
785 	 *     len = the maximum length (number of #gunichar2) of @str to use.
786 	 *         If @len < 0, then the string is nul-terminated.
787 	 *     itemsRead = location to store number of words read,
788 	 *         or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
789 	 *         returned in case @str contains a trailing partial character. If
790 	 *         an error occurs then the index of the invalid input is stored here.
791 	 *     itemsWritten = location to store number of characters
792 	 *         written, or %NULL. The value stored here does not include the trailing
793 	 *         0 character.
794 	 *
795 	 * Return: a pointer to a newly allocated UCS-4 string.
796 	 *     This value must be freed with g_free(). If an error occurs,
797 	 *     %NULL will be returned and @error set.
798 	 *
799 	 * Throws: GException on failure.
800 	 */
801 	public static dchar* utf16ToUcs4(wchar* str, glong len, glong* itemsRead, glong* itemsWritten)
802 	{
803 		GError* err = null;
804 		
805 		auto p = g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, &err);
806 		
807 		if (err !is null)
808 		{
809 			throw new GException( new ErrorG(err) );
810 		}
811 		
812 		return p;
813 	}
814 
815 	/**
816 	 * Convert a string from UTF-16 to UTF-8. The result will be
817 	 * terminated with a 0 byte.
818 	 *
819 	 * Note that the input is expected to be already in native endianness,
820 	 * an initial byte-order-mark character is not handled specially.
821 	 * g_convert() can be used to convert a byte buffer of UTF-16 data of
822 	 * ambiguous endianess.
823 	 *
824 	 * Further note that this function does not validate the result
825 	 * string; it may e.g. include embedded NUL characters. The only
826 	 * validation done by this function is to ensure that the input can
827 	 * be correctly interpreted as UTF-16, i.e. it doesn't contain
828 	 * things unpaired surrogates.
829 	 *
830 	 * Params:
831 	 *     str = a UTF-16 encoded string
832 	 *     len = the maximum length (number of #gunichar2) of @str to use.
833 	 *         If @len < 0, then the string is nul-terminated.
834 	 *     itemsRead = location to store number of words read,
835 	 *         or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
836 	 *         returned in case @str contains a trailing partial character. If
837 	 *         an error occurs then the index of the invalid input is stored here.
838 	 *     itemsWritten = location to store number of bytes written,
839 	 *         or %NULL. The value stored here does not include the trailing 0 byte.
840 	 *
841 	 * Return: a pointer to a newly allocated UTF-8 string.
842 	 *     This value must be freed with g_free(). If an error occurs,
843 	 *     %NULL will be returned and @error set.
844 	 *
845 	 * Throws: GException on failure.
846 	 */
847 	public static string utf16ToUtf8(wchar* str, glong len, glong* itemsRead, glong* itemsWritten)
848 	{
849 		GError* err = null;
850 		
851 		auto p = g_utf16_to_utf8(str, len, itemsRead, itemsWritten, &err);
852 		
853 		if (err !is null)
854 		{
855 			throw new GException( new ErrorG(err) );
856 		}
857 		
858 		return Str.toString(p);
859 	}
860 
861 	/**
862 	 * Converts a string into a form that is independent of case. The
863 	 * result will not correspond to any particular case, but can be
864 	 * compared for equality or ordered with the results of calling
865 	 * g_utf8_casefold() on other strings.
866 	 *
867 	 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
868 	 * only an approximation to the correct linguistic case insensitive
869 	 * ordering, though it is a fairly good one. Getting this exactly
870 	 * right would require a more sophisticated collation function that
871 	 * takes case sensitivity into account. GLib does not currently
872 	 * provide such a function.
873 	 *
874 	 * Params:
875 	 *     str = a UTF-8 encoded string
876 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
877 	 *
878 	 * Return: a newly allocated string, that is a
879 	 *     case independent form of @str.
880 	 */
881 	public static string utf8Casefold(string str, ptrdiff_t len)
882 	{
883 		return Str.toString(g_utf8_casefold(Str.toStringz(str), len));
884 	}
885 
886 	/**
887 	 * Compares two strings for ordering using the linguistically
888 	 * correct rules for the [current locale][setlocale].
889 	 * When sorting a large number of strings, it will be significantly
890 	 * faster to obtain collation keys with g_utf8_collate_key() and
891 	 * compare the keys with strcmp() when sorting instead of sorting
892 	 * the original strings.
893 	 *
894 	 * Params:
895 	 *     str1 = a UTF-8 encoded string
896 	 *     str2 = a UTF-8 encoded string
897 	 *
898 	 * Return: < 0 if @str1 compares before @str2,
899 	 *     0 if they compare equal, > 0 if @str1 compares after @str2.
900 	 */
901 	public static int utf8Collate(string str1, string str2)
902 	{
903 		return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
904 	}
905 
906 	/**
907 	 * Converts a string into a collation key that can be compared
908 	 * with other collation keys produced by the same function using
909 	 * strcmp().
910 	 *
911 	 * The results of comparing the collation keys of two strings
912 	 * with strcmp() will always be the same as comparing the two
913 	 * original keys with g_utf8_collate().
914 	 *
915 	 * Note that this function depends on the [current locale][setlocale].
916 	 *
917 	 * Params:
918 	 *     str = a UTF-8 encoded string.
919 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
920 	 *
921 	 * Return: a newly allocated string. This string should
922 	 *     be freed with g_free() when you are done with it.
923 	 */
924 	public static string utf8CollateKey(string str, ptrdiff_t len)
925 	{
926 		return Str.toString(g_utf8_collate_key(Str.toStringz(str), len));
927 	}
928 
929 	/**
930 	 * Converts a string into a collation key that can be compared
931 	 * with other collation keys produced by the same function using strcmp().
932 	 *
933 	 * In order to sort filenames correctly, this function treats the dot '.'
934 	 * as a special case. Most dictionary orderings seem to consider it
935 	 * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
936 	 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
937 	 * would like to treat numbers intelligently so that "file1" "file10" "file5"
938 	 * is sorted as "file1" "file5" "file10".
939 	 *
940 	 * Note that this function depends on the [current locale][setlocale].
941 	 *
942 	 * Params:
943 	 *     str = a UTF-8 encoded string.
944 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
945 	 *
946 	 * Return: a newly allocated string. This string should
947 	 *     be freed with g_free() when you are done with it.
948 	 *
949 	 * Since: 2.8
950 	 */
951 	public static string utf8CollateKeyForFilename(string str, ptrdiff_t len)
952 	{
953 		return Str.toString(g_utf8_collate_key_for_filename(Str.toStringz(str), len));
954 	}
955 
956 	/**
957 	 * Finds the start of the next UTF-8 character in the string after @p.
958 	 *
959 	 * @p does not have to be at the beginning of a UTF-8 character. No check
960 	 * is made to see if the character found is actually valid other than
961 	 * it starts with an appropriate byte.
962 	 *
963 	 * Params:
964 	 *     p = a pointer to a position within a UTF-8 encoded string
965 	 *     end = a pointer to the byte following the end of the string,
966 	 *         or %NULL to indicate that the string is nul-terminated
967 	 *
968 	 * Return: a pointer to the found character or %NULL
969 	 */
970 	public static string utf8FindNextChar(string p, string end)
971 	{
972 		return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)));
973 	}
974 
975 	/**
976 	 * Given a position @p with a UTF-8 encoded string @str, find the start
977 	 * of the previous UTF-8 character starting before @p. Returns %NULL if no
978 	 * UTF-8 characters are present in @str before @p.
979 	 *
980 	 * @p does not have to be at the beginning of a UTF-8 character. No check
981 	 * is made to see if the character found is actually valid other than
982 	 * it starts with an appropriate byte.
983 	 *
984 	 * Params:
985 	 *     str = pointer to the beginning of a UTF-8 encoded string
986 	 *     p = pointer to some position within @str
987 	 *
988 	 * Return: a pointer to the found character or %NULL.
989 	 */
990 	public static string utf8FindPrevChar(string str, string p)
991 	{
992 		return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)));
993 	}
994 
995 	/**
996 	 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
997 	 *
998 	 * If @p does not point to a valid UTF-8 encoded character, results
999 	 * are undefined. If you are not sure that the bytes are complete
1000 	 * valid Unicode characters, you should use g_utf8_get_char_validated()
1001 	 * instead.
1002 	 *
1003 	 * Params:
1004 	 *     p = a pointer to Unicode character encoded as UTF-8
1005 	 *
1006 	 * Return: the resulting character
1007 	 */
1008 	public static dchar utf8GetChar(string p)
1009 	{
1010 		return g_utf8_get_char(Str.toStringz(p));
1011 	}
1012 
1013 	/**
1014 	 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
1015 	 * This function checks for incomplete characters, for invalid characters
1016 	 * such as characters that are out of the range of Unicode, and for
1017 	 * overlong encodings of valid characters.
1018 	 *
1019 	 * Params:
1020 	 *     p = a pointer to Unicode character encoded as UTF-8
1021 	 *     maxLen = the maximum number of bytes to read, or -1, for no maximum or
1022 	 *         if @p is nul-terminated
1023 	 *
1024 	 * Return: the resulting character. If @p points to a partial
1025 	 *     sequence at the end of a string that could begin a valid
1026 	 *     character (or if @max_len is zero), returns (gunichar)-2;
1027 	 *     otherwise, if @p does not point to a valid UTF-8 encoded
1028 	 *     Unicode character, returns (gunichar)-1.
1029 	 */
1030 	public static dchar utf8GetCharValidated(string p, ptrdiff_t maxLen)
1031 	{
1032 		return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
1033 	}
1034 
1035 	/**
1036 	 * Converts a string into canonical form, standardizing
1037 	 * such issues as whether a character with an accent
1038 	 * is represented as a base character and combining
1039 	 * accent or as a single precomposed character. The
1040 	 * string has to be valid UTF-8, otherwise %NULL is
1041 	 * returned. You should generally call g_utf8_normalize()
1042 	 * before comparing two Unicode strings.
1043 	 *
1044 	 * The normalization mode %G_NORMALIZE_DEFAULT only
1045 	 * standardizes differences that do not affect the
1046 	 * text content, such as the above-mentioned accent
1047 	 * representation. %G_NORMALIZE_ALL also standardizes
1048 	 * the "compatibility" characters in Unicode, such
1049 	 * as SUPERSCRIPT THREE to the standard forms
1050 	 * (in this case DIGIT THREE). Formatting information
1051 	 * may be lost but for most text operations such
1052 	 * characters should be considered the same.
1053 	 *
1054 	 * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
1055 	 * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
1056 	 * but returned a result with composed forms rather
1057 	 * than a maximally decomposed form. This is often
1058 	 * useful if you intend to convert the string to
1059 	 * a legacy encoding or pass it to a system with
1060 	 * less capable Unicode handling.
1061 	 *
1062 	 * Params:
1063 	 *     str = a UTF-8 encoded string.
1064 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1065 	 *     mode = the type of normalization to perform.
1066 	 *
1067 	 * Return: a newly allocated string, that is the
1068 	 *     normalized form of @str, or %NULL if @str is not
1069 	 *     valid UTF-8.
1070 	 */
1071 	public static string utf8Normalize(string str, ptrdiff_t len, GNormalizeMode mode)
1072 	{
1073 		return Str.toString(g_utf8_normalize(Str.toStringz(str), len, mode));
1074 	}
1075 
1076 	/**
1077 	 * Converts from an integer character offset to a pointer to a position
1078 	 * within the string.
1079 	 *
1080 	 * Since 2.10, this function allows to pass a negative @offset to
1081 	 * step backwards. It is usually worth stepping backwards from the end
1082 	 * instead of forwards if @offset is in the last fourth of the string,
1083 	 * since moving forward is about 3 times faster than moving backward.
1084 	 *
1085 	 * Note that this function doesn't abort when reaching the end of @str.
1086 	 * Therefore you should be sure that @offset is within string boundaries
1087 	 * before calling that function. Call g_utf8_strlen() when unsure.
1088 	 * This limitation exists as this function is called frequently during
1089 	 * text rendering and therefore has to be as fast as possible.
1090 	 *
1091 	 * Params:
1092 	 *     str = a UTF-8 encoded string
1093 	 *     offset = a character offset within @str
1094 	 *
1095 	 * Return: the resulting pointer
1096 	 */
1097 	public static string utf8OffsetToPointer(string str, glong offset)
1098 	{
1099 		return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset));
1100 	}
1101 
1102 	/**
1103 	 * Converts from a pointer to position within a string to a integer
1104 	 * character offset.
1105 	 *
1106 	 * Since 2.10, this function allows @pos to be before @str, and returns
1107 	 * a negative offset in this case.
1108 	 *
1109 	 * Params:
1110 	 *     str = a UTF-8 encoded string
1111 	 *     pos = a pointer to a position within @str
1112 	 *
1113 	 * Return: the resulting character offset
1114 	 */
1115 	public static glong utf8PointerToOffset(string str, string pos)
1116 	{
1117 		return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
1118 	}
1119 
1120 	/**
1121 	 * Finds the previous UTF-8 character in the string before @p.
1122 	 *
1123 	 * @p does not have to be at the beginning of a UTF-8 character. No check
1124 	 * is made to see if the character found is actually valid other than
1125 	 * it starts with an appropriate byte. If @p might be the first
1126 	 * character of the string, you must use g_utf8_find_prev_char() instead.
1127 	 *
1128 	 * Params:
1129 	 *     p = a pointer to a position within a UTF-8 encoded string
1130 	 *
1131 	 * Return: a pointer to the found character
1132 	 */
1133 	public static string utf8PrevChar(string p)
1134 	{
1135 		return Str.toString(g_utf8_prev_char(Str.toStringz(p)));
1136 	}
1137 
1138 	/**
1139 	 * Finds the leftmost occurrence of the given Unicode character
1140 	 * in a UTF-8 encoded string, while limiting the search to @len bytes.
1141 	 * If @len is -1, allow unbounded search.
1142 	 *
1143 	 * Params:
1144 	 *     p = a nul-terminated UTF-8 encoded string
1145 	 *     len = the maximum length of @p
1146 	 *     c = a Unicode character
1147 	 *
1148 	 * Return: %NULL if the string does not contain the character,
1149 	 *     otherwise, a pointer to the start of the leftmost occurrence
1150 	 *     of the character in the string.
1151 	 */
1152 	public static string utf8Strchr(string p, ptrdiff_t len, dchar c)
1153 	{
1154 		return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c));
1155 	}
1156 
1157 	/**
1158 	 * Converts all Unicode characters in the string that have a case
1159 	 * to lowercase. The exact manner that this is done depends
1160 	 * on the current locale, and may result in the number of
1161 	 * characters in the string changing.
1162 	 *
1163 	 * Params:
1164 	 *     str = a UTF-8 encoded string
1165 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1166 	 *
1167 	 * Return: a newly allocated string, with all characters
1168 	 *     converted to lowercase.
1169 	 */
1170 	public static string utf8Strdown(string str, ptrdiff_t len)
1171 	{
1172 		return Str.toString(g_utf8_strdown(Str.toStringz(str), len));
1173 	}
1174 
1175 	/**
1176 	 * Computes the length of the string in characters, not including
1177 	 * the terminating nul character. If the @max'th byte falls in the
1178 	 * middle of a character, the last (partial) character is not counted.
1179 	 *
1180 	 * Params:
1181 	 *     p = pointer to the start of a UTF-8 encoded string
1182 	 *     max = the maximum number of bytes to examine. If @max
1183 	 *         is less than 0, then the string is assumed to be
1184 	 *         nul-terminated. If @max is 0, @p will not be examined and
1185 	 *         may be %NULL. If @max is greater than 0, up to @max
1186 	 *         bytes are examined
1187 	 *
1188 	 * Return: the length of the string in characters
1189 	 */
1190 	public static glong utf8Strlen(string p, ptrdiff_t max)
1191 	{
1192 		return g_utf8_strlen(Str.toStringz(p), max);
1193 	}
1194 
1195 	/**
1196 	 * Like the standard C strncpy() function, but copies a given number
1197 	 * of characters instead of a given number of bytes. The @src string
1198 	 * must be valid UTF-8 encoded text. (Use g_utf8_validate() on all
1199 	 * text before trying to use UTF-8 utility functions with it.)
1200 	 *
1201 	 * Params:
1202 	 *     dest = buffer to fill with characters from @src
1203 	 *     src = UTF-8 encoded string
1204 	 *     n = character count
1205 	 *
1206 	 * Return: @dest
1207 	 */
1208 	public static string utf8Strncpy(string dest, string src, size_t n)
1209 	{
1210 		return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n));
1211 	}
1212 
1213 	/**
1214 	 * Find the rightmost occurrence of the given Unicode character
1215 	 * in a UTF-8 encoded string, while limiting the search to @len bytes.
1216 	 * If @len is -1, allow unbounded search.
1217 	 *
1218 	 * Params:
1219 	 *     p = a nul-terminated UTF-8 encoded string
1220 	 *     len = the maximum length of @p
1221 	 *     c = a Unicode character
1222 	 *
1223 	 * Return: %NULL if the string does not contain the character,
1224 	 *     otherwise, a pointer to the start of the rightmost occurrence
1225 	 *     of the character in the string.
1226 	 */
1227 	public static string utf8Strrchr(string p, ptrdiff_t len, dchar c)
1228 	{
1229 		return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c));
1230 	}
1231 
1232 	/**
1233 	 * Reverses a UTF-8 string. @str must be valid UTF-8 encoded text.
1234 	 * (Use g_utf8_validate() on all text before trying to use UTF-8
1235 	 * utility functions with it.)
1236 	 *
1237 	 * This function is intended for programmatic uses of reversed strings.
1238 	 * It pays no attention to decomposed characters, combining marks, byte
1239 	 * order marks, directional indicators (LRM, LRO, etc) and similar
1240 	 * characters which might need special handling when reversing a string
1241 	 * for display purposes.
1242 	 *
1243 	 * Note that unlike g_strreverse(), this function returns
1244 	 * newly-allocated memory, which should be freed with g_free() when
1245 	 * no longer needed.
1246 	 *
1247 	 * Params:
1248 	 *     str = a UTF-8 encoded string
1249 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1250 	 *         then the string is nul-terminated.
1251 	 *
1252 	 * Return: a newly-allocated string which is the reverse of @str
1253 	 *
1254 	 * Since: 2.2
1255 	 */
1256 	public static string utf8Strreverse(string str, ptrdiff_t len)
1257 	{
1258 		return Str.toString(g_utf8_strreverse(Str.toStringz(str), len));
1259 	}
1260 
1261 	/**
1262 	 * Converts all Unicode characters in the string that have a case
1263 	 * to uppercase. The exact manner that this is done depends
1264 	 * on the current locale, and may result in the number of
1265 	 * characters in the string increasing. (For instance, the
1266 	 * German ess-zet will be changed to SS.)
1267 	 *
1268 	 * Params:
1269 	 *     str = a UTF-8 encoded string
1270 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1271 	 *
1272 	 * Return: a newly allocated string, with all characters
1273 	 *     converted to uppercase.
1274 	 */
1275 	public static string utf8Strup(string str, ptrdiff_t len)
1276 	{
1277 		return Str.toString(g_utf8_strup(Str.toStringz(str), len));
1278 	}
1279 
1280 	/**
1281 	 * Copies a substring out of a UTF-8 encoded string.
1282 	 * The substring will contain @end_pos - @start_pos characters.
1283 	 *
1284 	 * Params:
1285 	 *     str = a UTF-8 encoded string
1286 	 *     startPos = a character offset within @str
1287 	 *     endPos = another character offset within @str
1288 	 *
1289 	 * Return: a newly allocated copy of the requested
1290 	 *     substring. Free with g_free() when no longer needed.
1291 	 *
1292 	 * Since: 2.30
1293 	 */
1294 	public static string utf8Substring(string str, glong startPos, glong endPos)
1295 	{
1296 		return Str.toString(g_utf8_substring(Str.toStringz(str), startPos, endPos));
1297 	}
1298 
1299 	/**
1300 	 * Convert a string from UTF-8 to a 32-bit fixed width
1301 	 * representation as UCS-4. A trailing 0 character will be added to the
1302 	 * string after the converted text.
1303 	 *
1304 	 * Params:
1305 	 *     str = a UTF-8 encoded string
1306 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1307 	 *         then the string is nul-terminated.
1308 	 *     itemsRead = location to store number of bytes read, or %NULL.
1309 	 *         If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
1310 	 *         returned in case @str contains a trailing partial
1311 	 *         character. If an error occurs then the index of the
1312 	 *         invalid input is stored here.
1313 	 *     itemsWritten = location to store number of characters
1314 	 *         written or %NULL. The value here stored does not include the
1315 	 *         trailing 0 character.
1316 	 *
1317 	 * Return: a pointer to a newly allocated UCS-4 string.
1318 	 *     This value must be freed with g_free(). If an error occurs,
1319 	 *     %NULL will be returned and @error set.
1320 	 *
1321 	 * Throws: GException on failure.
1322 	 */
1323 	public static dchar* utf8ToUcs4(string str, glong len, glong* itemsRead, glong* itemsWritten)
1324 	{
1325 		GError* err = null;
1326 		
1327 		auto p = g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, &err);
1328 		
1329 		if (err !is null)
1330 		{
1331 			throw new GException( new ErrorG(err) );
1332 		}
1333 		
1334 		return p;
1335 	}
1336 
1337 	/**
1338 	 * Convert a string from UTF-8 to a 32-bit fixed width
1339 	 * representation as UCS-4, assuming valid UTF-8 input.
1340 	 * This function is roughly twice as fast as g_utf8_to_ucs4()
1341 	 * but does no error checking on the input. A trailing 0 character
1342 	 * will be added to the string after the converted text.
1343 	 *
1344 	 * Params:
1345 	 *     str = a UTF-8 encoded string
1346 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1347 	 *         then the string is nul-terminated.
1348 	 *     itemsWritten = location to store the number of
1349 	 *         characters in the result, or %NULL.
1350 	 *
1351 	 * Return: a pointer to a newly allocated UCS-4 string.
1352 	 *     This value must be freed with g_free().
1353 	 */
1354 	public static dchar* utf8ToUcs4Fast(string str, glong len, glong* itemsWritten)
1355 	{
1356 		return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten);
1357 	}
1358 
1359 	/**
1360 	 * Convert a string from UTF-8 to UTF-16. A 0 character will be
1361 	 * added to the result after the converted text.
1362 	 *
1363 	 * Params:
1364 	 *     str = a UTF-8 encoded string
1365 	 *     len = the maximum length (number of bytes) of @str to use.
1366 	 *         If @len < 0, then the string is nul-terminated.
1367 	 *     itemsRead = location to store number of bytes read,
1368 	 *         or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
1369 	 *         returned in case @str contains a trailing partial character. If
1370 	 *         an error occurs then the index of the invalid input is stored here.
1371 	 *     itemsWritten = location to store number of #gunichar2
1372 	 *         written, or %NULL. The value stored here does not include the
1373 	 *         trailing 0.
1374 	 *
1375 	 * Return: a pointer to a newly allocated UTF-16 string.
1376 	 *     This value must be freed with g_free(). If an error occurs,
1377 	 *     %NULL will be returned and @error set.
1378 	 *
1379 	 * Throws: GException on failure.
1380 	 */
1381 	public static wchar* utf8ToUtf16(string str, glong len, glong* itemsRead, glong* itemsWritten)
1382 	{
1383 		GError* err = null;
1384 		
1385 		auto p = g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, &err);
1386 		
1387 		if (err !is null)
1388 		{
1389 			throw new GException( new ErrorG(err) );
1390 		}
1391 		
1392 		return p;
1393 	}
1394 
1395 	/**
1396 	 * Validates UTF-8 encoded text. @str is the text to validate;
1397 	 * if @str is nul-terminated, then @max_len can be -1, otherwise
1398 	 * @max_len should be the number of bytes to validate.
1399 	 * If @end is non-%NULL, then the end of the valid range
1400 	 * will be stored there (i.e. the start of the first invalid
1401 	 * character if some bytes were invalid, or the end of the text
1402 	 * being validated otherwise).
1403 	 *
1404 	 * Note that g_utf8_validate() returns %FALSE if @max_len is
1405 	 * positive and any of the @max_len bytes are nul.
1406 	 *
1407 	 * Returns %TRUE if all of @str was valid. Many GLib and GTK+
1408 	 * routines require valid UTF-8 as input; so data read from a file
1409 	 * or the network should be checked with g_utf8_validate() before
1410 	 * doing anything else with it.
1411 	 *
1412 	 * Params:
1413 	 *     str = a pointer to character data
1414 	 *     maxLen = max bytes to validate, or -1 to go until NUL
1415 	 *     end = return location for end of valid data
1416 	 *
1417 	 * Return: %TRUE if the text was valid UTF-8
1418 	 */
1419 	public static bool utf8Validate(string str, out string end)
1420 	{
1421 		char* outend = null;
1422 		
1423 		auto p = g_utf8_validate(Str.toStringz(str), cast(ptrdiff_t)str.length, &outend) != 0;
1424 		
1425 		end = Str.toString(outend);
1426 		
1427 		return p;
1428 	}
1429 }