glib.Unicode source code

1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 // generated automatically - do not change
21 // find conversion definition on APILookup.txt
22 // implement new conversion functionalities on the wrap.utils pakage
23 
24 
25 module glib.Unicode;
26 
27 private import glib.ErrorG;
28 private import glib.GException;
29 private import glib.Str;
30 private import glib.c.functions;
31 public  import glib.c.types;
32 public  import gtkc.glibtypes;
33 
34 
35 /** */
36 public struct Unicode
37 {
38 
39 	/**
40 	 * Convert a string from UCS-4 to UTF-16. A 0 character will be
41 	 * added to the result after the converted text.
42 	 *
43 	 * Params:
44 	 *     str = a UCS-4 encoded string
45 	 *     len = the maximum length (number of characters) of @str to use.
46 	 *         If @len < 0, then the string is nul-terminated.
47 	 *     itemsRead = location to store number of
48 	 *         bytes read, or %NULL. If an error occurs then the index of the invalid
49 	 *         input is stored here.
50 	 *     itemsWritten = location to store number
51 	 *         of #gunichar2  written, or %NULL. The value stored here does not include
52 	 *         the trailing 0.
53 	 *
54 	 * Returns: a pointer to a newly allocated UTF-16 string.
55 	 *     This value must be freed with g_free(). If an error occurs,
56 	 *     %NULL will be returned and @error set.
57 	 *
58 	 * Throws: GException on failure.
59 	 */
60 	public static wchar* ucs4ToUtf16(dchar* str, glong len, out glong itemsRead, out glong itemsWritten)
61 	{
62 		GError* err = null;
63 
64 		auto __p = g_ucs4_to_utf16(str, len, &itemsRead, &itemsWritten, &err);
65 
66 		if (err !is null)
67 		{
68 			throw new GException( new ErrorG(err) );
69 		}
70 
71 		return __p;
72 	}
73 
74 	/**
75 	 * Convert a string from a 32-bit fixed width representation as UCS-4.
76 	 * to UTF-8. The result will be terminated with a 0 byte.
77 	 *
78 	 * Params:
79 	 *     str = a UCS-4 encoded string
80 	 *     len = the maximum length (number of characters) of @str to use.
81 	 *         If @len < 0, then the string is nul-terminated.
82 	 *     itemsRead = location to store number of
83 	 *         characters read, or %NULL.
84 	 *     itemsWritten = location to store number
85 	 *         of bytes written or %NULL. The value here stored does not include the
86 	 *         trailing 0 byte.
87 	 *
88 	 * Returns: a pointer to a newly allocated UTF-8 string.
89 	 *     This value must be freed with g_free(). If an error occurs,
90 	 *     %NULL will be returned and @error set. In that case, @items_read
91 	 *     will be set to the position of the first invalid input character.
92 	 *
93 	 * Throws: GException on failure.
94 	 */
95 	public static string ucs4ToUtf8(dchar* str, glong len, out glong itemsRead, out glong itemsWritten)
96 	{
97 		GError* err = null;
98 
99 		auto retStr = g_ucs4_to_utf8(str, len, &itemsRead, &itemsWritten, &err);
100 
101 		if (err !is null)
102 		{
103 			throw new GException( new ErrorG(err) );
104 		}
105 
106 		scope(exit) Str.freeString(retStr);
107 		return Str.toString(retStr);
108 	}
109 
110 	/**
111 	 * Determines the break type of @c. @c should be a Unicode character
112 	 * (to derive a character from UTF-8 encoded text, use
113 	 * g_utf8_get_char()). The break type is used to find word and line
114 	 * breaks ("text boundaries"), Pango implements the Unicode boundary
115 	 * resolution algorithms and normally you would use a function such
116 	 * as pango_break() instead of caring about break types yourself.
117 	 *
118 	 * Params:
119 	 *     c = a Unicode character
120 	 *
121 	 * Returns: the break type of @c
122 	 */
123 	public static GUnicodeBreakType unicharBreakType(dchar c)
124 	{
125 		return g_unichar_break_type(c);
126 	}
127 
128 	/**
129 	 * Determines the canonical combining class of a Unicode character.
130 	 *
131 	 * Params:
132 	 *     uc = a Unicode character
133 	 *
134 	 * Returns: the combining class of the character
135 	 *
136 	 * Since: 2.14
137 	 */
138 	public static int unicharCombiningClass(dchar uc)
139 	{
140 		return g_unichar_combining_class(uc);
141 	}
142 
143 	/**
144 	 * Performs a single composition step of the
145 	 * Unicode canonical composition algorithm.
146 	 *
147 	 * This function includes algorithmic Hangul Jamo composition,
148 	 * but it is not exactly the inverse of g_unichar_decompose().
149 	 * No composition can have either of @a or @b equal to zero.
150 	 * To be precise, this function composes if and only if
151 	 * there exists a Primary Composite P which is canonically
152 	 * equivalent to the sequence <@a,@b>.  See the Unicode
153 	 * Standard for the definition of Primary Composite.
154 	 *
155 	 * If @a and @b do not compose a new character, @ch is set to zero.
156 	 *
157 	 * See
158 	 * [UAX#15](http://unicode.org/reports/tr15/)
159 	 * for details.
160 	 *
161 	 * Params:
162 	 *     a = a Unicode character
163 	 *     b = a Unicode character
164 	 *     ch = return location for the composed character
165 	 *
166 	 * Returns: %TRUE if the characters could be composed
167 	 *
168 	 * Since: 2.30
169 	 */
170 	public static bool unicharCompose(dchar a, dchar b, out dchar ch)
171 	{
172 		return g_unichar_compose(a, b, &ch) != 0;
173 	}
174 
175 	/**
176 	 * Performs a single decomposition step of the
177 	 * Unicode canonical decomposition algorithm.
178 	 *
179 	 * This function does not include compatibility
180 	 * decompositions. It does, however, include algorithmic
181 	 * Hangul Jamo decomposition, as well as 'singleton'
182 	 * decompositions which replace a character by a single
183 	 * other character. In the case of singletons *@b will
184 	 * be set to zero.
185 	 *
186 	 * If @ch is not decomposable, *@a is set to @ch and *@b
187 	 * is set to zero.
188 	 *
189 	 * Note that the way Unicode decomposition pairs are
190 	 * defined, it is guaranteed that @b would not decompose
191 	 * further, but @a may itself decompose.  To get the full
192 	 * canonical decomposition for @ch, one would need to
193 	 * recursively call this function on @a.  Or use
194 	 * g_unichar_fully_decompose().
195 	 *
196 	 * See
197 	 * [UAX#15](http://unicode.org/reports/tr15/)
198 	 * for details.
199 	 *
200 	 * Params:
201 	 *     ch = a Unicode character
202 	 *     a = return location for the first component of @ch
203 	 *     b = return location for the second component of @ch
204 	 *
205 	 * Returns: %TRUE if the character could be decomposed
206 	 *
207 	 * Since: 2.30
208 	 */
209 	public static bool unicharDecompose(dchar ch, out dchar a, out dchar b)
210 	{
211 		return g_unichar_decompose(ch, &a, &b) != 0;
212 	}
213 
214 	/**
215 	 * Determines the numeric value of a character as a decimal
216 	 * digit.
217 	 *
218 	 * Params:
219 	 *     c = a Unicode character
220 	 *
221 	 * Returns: If @c is a decimal digit (according to
222 	 *     g_unichar_isdigit()), its numeric value. Otherwise, -1.
223 	 */
224 	public static int unicharDigitValue(dchar c)
225 	{
226 		return g_unichar_digit_value(c);
227 	}
228 
229 	/**
230 	 * Computes the canonical or compatibility decomposition of a
231 	 * Unicode character.  For compatibility decomposition,
232 	 * pass %TRUE for @compat; for canonical decomposition
233 	 * pass %FALSE for @compat.
234 	 *
235 	 * The decomposed sequence is placed in @result.  Only up to
236 	 * @result_len characters are written into @result.  The length
237 	 * of the full decomposition (irrespective of @result_len) is
238 	 * returned by the function.  For canonical decomposition,
239 	 * currently all decompositions are of length at most 4, but
240 	 * this may change in the future (very unlikely though).
241 	 * At any rate, Unicode does guarantee that a buffer of length
242 	 * 18 is always enough for both compatibility and canonical
243 	 * decompositions, so that is the size recommended. This is provided
244 	 * as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.
245 	 *
246 	 * See
247 	 * [UAX#15](http://unicode.org/reports/tr15/)
248 	 * for details.
249 	 *
250 	 * Params:
251 	 *     ch = a Unicode character.
252 	 *     compat = whether perform canonical or compatibility decomposition
253 	 *     result = location to store decomposed result, or %NULL
254 	 *     resultLen = length of @result
255 	 *
256 	 * Returns: the length of the full decomposition.
257 	 *
258 	 * Since: 2.30
259 	 */
260 	public static size_t unicharFullyDecompose(dchar ch, bool compat, out dchar result, size_t resultLen)
261 	{
262 		return g_unichar_fully_decompose(ch, compat, &result, resultLen);
263 	}
264 
265 	/**
266 	 * In Unicode, some characters are "mirrored". This means that their
267 	 * images are mirrored horizontally in text that is laid out from right
268 	 * to left. For instance, "(" would become its mirror image, ")", in
269 	 * right-to-left text.
270 	 *
271 	 * If @ch has the Unicode mirrored property and there is another unicode
272 	 * character that typically has a glyph that is the mirror image of @ch's
273 	 * glyph and @mirrored_ch is set, it puts that character in the address
274 	 * pointed to by @mirrored_ch.  Otherwise the original character is put.
275 	 *
276 	 * Params:
277 	 *     ch = a Unicode character
278 	 *     mirroredCh = location to store the mirrored character
279 	 *
280 	 * Returns: %TRUE if @ch has a mirrored character, %FALSE otherwise
281 	 *
282 	 * Since: 2.4
283 	 */
284 	public static bool unicharGetMirrorChar(dchar ch, dchar* mirroredCh)
285 	{
286 		return g_unichar_get_mirror_char(ch, mirroredCh) != 0;
287 	}
288 
289 	/**
290 	 * Looks up the #GUnicodeScript for a particular character (as defined
291 	 * by Unicode Standard Annex \#24). No check is made for @ch being a
292 	 * valid Unicode character; if you pass in invalid character, the
293 	 * result is undefined.
294 	 *
295 	 * This function is equivalent to pango_script_for_unichar() and the
296 	 * two are interchangeable.
297 	 *
298 	 * Params:
299 	 *     ch = a Unicode character
300 	 *
301 	 * Returns: the #GUnicodeScript for the character.
302 	 *
303 	 * Since: 2.14
304 	 */
305 	public static GUnicodeScript unicharGetScript(dchar ch)
306 	{
307 		return g_unichar_get_script(ch);
308 	}
309 
310 	/**
311 	 * Determines whether a character is alphanumeric.
312 	 * Given some UTF-8 text, obtain a character value
313 	 * with g_utf8_get_char().
314 	 *
315 	 * Params:
316 	 *     c = a Unicode character
317 	 *
318 	 * Returns: %TRUE if @c is an alphanumeric character
319 	 */
320 	public static bool unicharIsalnum(dchar c)
321 	{
322 		return g_unichar_isalnum(c) != 0;
323 	}
324 
325 	/**
326 	 * Determines whether a character is alphabetic (i.e. a letter).
327 	 * Given some UTF-8 text, obtain a character value with
328 	 * g_utf8_get_char().
329 	 *
330 	 * Params:
331 	 *     c = a Unicode character
332 	 *
333 	 * Returns: %TRUE if @c is an alphabetic character
334 	 */
335 	public static bool unicharIsalpha(dchar c)
336 	{
337 		return g_unichar_isalpha(c) != 0;
338 	}
339 
340 	/**
341 	 * Determines whether a character is a control character.
342 	 * Given some UTF-8 text, obtain a character value with
343 	 * g_utf8_get_char().
344 	 *
345 	 * Params:
346 	 *     c = a Unicode character
347 	 *
348 	 * Returns: %TRUE if @c is a control character
349 	 */
350 	public static bool unicharIscntrl(dchar c)
351 	{
352 		return g_unichar_iscntrl(c) != 0;
353 	}
354 
355 	/**
356 	 * Determines if a given character is assigned in the Unicode
357 	 * standard.
358 	 *
359 	 * Params:
360 	 *     c = a Unicode character
361 	 *
362 	 * Returns: %TRUE if the character has an assigned value
363 	 */
364 	public static bool unicharIsdefined(dchar c)
365 	{
366 		return g_unichar_isdefined(c) != 0;
367 	}
368 
369 	/**
370 	 * Determines whether a character is numeric (i.e. a digit).  This
371 	 * covers ASCII 0-9 and also digits in other languages/scripts.  Given
372 	 * some UTF-8 text, obtain a character value with g_utf8_get_char().
373 	 *
374 	 * Params:
375 	 *     c = a Unicode character
376 	 *
377 	 * Returns: %TRUE if @c is a digit
378 	 */
379 	public static bool unicharIsdigit(dchar c)
380 	{
381 		return g_unichar_isdigit(c) != 0;
382 	}
383 
384 	/**
385 	 * Determines whether a character is printable and not a space
386 	 * (returns %FALSE for control characters, format characters, and
387 	 * spaces). g_unichar_isprint() is similar, but returns %TRUE for
388 	 * spaces. Given some UTF-8 text, obtain a character value with
389 	 * g_utf8_get_char().
390 	 *
391 	 * Params:
392 	 *     c = a Unicode character
393 	 *
394 	 * Returns: %TRUE if @c is printable unless it's a space
395 	 */
396 	public static bool unicharIsgraph(dchar c)
397 	{
398 		return g_unichar_isgraph(c) != 0;
399 	}
400 
401 	/**
402 	 * Determines whether a character is a lowercase letter.
403 	 * Given some UTF-8 text, obtain a character value with
404 	 * g_utf8_get_char().
405 	 *
406 	 * Params:
407 	 *     c = a Unicode character
408 	 *
409 	 * Returns: %TRUE if @c is a lowercase letter
410 	 */
411 	public static bool unicharIslower(dchar c)
412 	{
413 		return g_unichar_islower(c) != 0;
414 	}
415 
416 	/**
417 	 * Determines whether a character is a mark (non-spacing mark,
418 	 * combining mark, or enclosing mark in Unicode speak).
419 	 * Given some UTF-8 text, obtain a character value
420 	 * with g_utf8_get_char().
421 	 *
422 	 * Note: in most cases where isalpha characters are allowed,
423 	 * ismark characters should be allowed to as they are essential
424 	 * for writing most European languages as well as many non-Latin
425 	 * scripts.
426 	 *
427 	 * Params:
428 	 *     c = a Unicode character
429 	 *
430 	 * Returns: %TRUE if @c is a mark character
431 	 *
432 	 * Since: 2.14
433 	 */
434 	public static bool unicharIsmark(dchar c)
435 	{
436 		return g_unichar_ismark(c) != 0;
437 	}
438 
439 	/**
440 	 * Determines whether a character is printable.
441 	 * Unlike g_unichar_isgraph(), returns %TRUE for spaces.
442 	 * Given some UTF-8 text, obtain a character value with
443 	 * g_utf8_get_char().
444 	 *
445 	 * Params:
446 	 *     c = a Unicode character
447 	 *
448 	 * Returns: %TRUE if @c is printable
449 	 */
450 	public static bool unicharIsprint(dchar c)
451 	{
452 		return g_unichar_isprint(c) != 0;
453 	}
454 
455 	/**
456 	 * Determines whether a character is punctuation or a symbol.
457 	 * Given some UTF-8 text, obtain a character value with
458 	 * g_utf8_get_char().
459 	 *
460 	 * Params:
461 	 *     c = a Unicode character
462 	 *
463 	 * Returns: %TRUE if @c is a punctuation or symbol character
464 	 */
465 	public static bool unicharIspunct(dchar c)
466 	{
467 		return g_unichar_ispunct(c) != 0;
468 	}
469 
470 	/**
471 	 * Determines whether a character is a space, tab, or line separator
472 	 * (newline, carriage return, etc.).  Given some UTF-8 text, obtain a
473 	 * character value with g_utf8_get_char().
474 	 *
475 	 * (Note: don't use this to do word breaking; you have to use
476 	 * Pango or equivalent to get word breaking right, the algorithm
477 	 * is fairly complex.)
478 	 *
479 	 * Params:
480 	 *     c = a Unicode character
481 	 *
482 	 * Returns: %TRUE if @c is a space character
483 	 */
484 	public static bool unicharIsspace(dchar c)
485 	{
486 		return g_unichar_isspace(c) != 0;
487 	}
488 
489 	/**
490 	 * Determines if a character is titlecase. Some characters in
491 	 * Unicode which are composites, such as the DZ digraph
492 	 * have three case variants instead of just two. The titlecase
493 	 * form is used at the beginning of a word where only the
494 	 * first letter is capitalized. The titlecase form of the DZ
495 	 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
496 	 *
497 	 * Params:
498 	 *     c = a Unicode character
499 	 *
500 	 * Returns: %TRUE if the character is titlecase
501 	 */
502 	public static bool unicharIstitle(dchar c)
503 	{
504 		return g_unichar_istitle(c) != 0;
505 	}
506 
507 	/**
508 	 * Determines if a character is uppercase.
509 	 *
510 	 * Params:
511 	 *     c = a Unicode character
512 	 *
513 	 * Returns: %TRUE if @c is an uppercase character
514 	 */
515 	public static bool unicharIsupper(dchar c)
516 	{
517 		return g_unichar_isupper(c) != 0;
518 	}
519 
520 	/**
521 	 * Determines if a character is typically rendered in a double-width
522 	 * cell.
523 	 *
524 	 * Params:
525 	 *     c = a Unicode character
526 	 *
527 	 * Returns: %TRUE if the character is wide
528 	 */
529 	public static bool unicharIswide(dchar c)
530 	{
531 		return g_unichar_iswide(c) != 0;
532 	}
533 
534 	/**
535 	 * Determines if a character is typically rendered in a double-width
536 	 * cell under legacy East Asian locales.  If a character is wide according to
537 	 * g_unichar_iswide(), then it is also reported wide with this function, but
538 	 * the converse is not necessarily true. See the
539 	 * [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
540 	 * for details.
541 	 *
542 	 * If a character passes the g_unichar_iswide() test then it will also pass
543 	 * this test, but not the other way around.  Note that some characters may
544 	 * pass both this test and g_unichar_iszerowidth().
545 	 *
546 	 * Params:
547 	 *     c = a Unicode character
548 	 *
549 	 * Returns: %TRUE if the character is wide in legacy East Asian locales
550 	 *
551 	 * Since: 2.12
552 	 */
553 	public static bool unicharIswideCjk(dchar c)
554 	{
555 		return g_unichar_iswide_cjk(c) != 0;
556 	}
557 
558 	/**
559 	 * Determines if a character is a hexidecimal digit.
560 	 *
561 	 * Params:
562 	 *     c = a Unicode character.
563 	 *
564 	 * Returns: %TRUE if the character is a hexadecimal digit
565 	 */
566 	public static bool unicharIsxdigit(dchar c)
567 	{
568 		return g_unichar_isxdigit(c) != 0;
569 	}
570 
571 	/**
572 	 * Determines if a given character typically takes zero width when rendered.
573 	 * The return value is %TRUE for all non-spacing and enclosing marks
574 	 * (e.g., combining accents), format characters, zero-width
575 	 * space, but not U+00AD SOFT HYPHEN.
576 	 *
577 	 * A typical use of this function is with one of g_unichar_iswide() or
578 	 * g_unichar_iswide_cjk() to determine the number of cells a string occupies
579 	 * when displayed on a grid display (terminals).  However, note that not all
580 	 * terminals support zero-width rendering of zero-width marks.
581 	 *
582 	 * Params:
583 	 *     c = a Unicode character
584 	 *
585 	 * Returns: %TRUE if the character has zero width
586 	 *
587 	 * Since: 2.14
588 	 */
589 	public static bool unicharIszerowidth(dchar c)
590 	{
591 		return g_unichar_iszerowidth(c) != 0;
592 	}
593 
594 	/**
595 	 * Converts a single character to UTF-8.
596 	 *
597 	 * Params:
598 	 *     c = a Unicode character code
599 	 *     outbuf = output buffer, must have at
600 	 *         least 6 bytes of space. If %NULL, the length will be computed and
601 	 *         returned and nothing will be written to @outbuf.
602 	 *
603 	 * Returns: number of bytes written
604 	 */
605 	public static int unicharToUtf8(dchar c, out char[] outbuf)
606 	{
607 		return g_unichar_to_utf8(c, outbuf.ptr);
608 	}
609 
610 	/**
611 	 * Converts a character to lower case.
612 	 *
613 	 * Params:
614 	 *     c = a Unicode character.
615 	 *
616 	 * Returns: the result of converting @c to lower case.
617 	 *     If @c is not an upperlower or titlecase character,
618 	 *     or has no lowercase equivalent @c is returned unchanged.
619 	 */
620 	public static dchar unicharTolower(dchar c)
621 	{
622 		return g_unichar_tolower(c);
623 	}
624 
625 	/**
626 	 * Converts a character to the titlecase.
627 	 *
628 	 * Params:
629 	 *     c = a Unicode character
630 	 *
631 	 * Returns: the result of converting @c to titlecase.
632 	 *     If @c is not an uppercase or lowercase character,
633 	 *     @c is returned unchanged.
634 	 */
635 	public static dchar unicharTotitle(dchar c)
636 	{
637 		return g_unichar_totitle(c);
638 	}
639 
640 	/**
641 	 * Converts a character to uppercase.
642 	 *
643 	 * Params:
644 	 *     c = a Unicode character
645 	 *
646 	 * Returns: the result of converting @c to uppercase.
647 	 *     If @c is not a lowercase or titlecase character,
648 	 *     or has no upper case equivalent @c is returned unchanged.
649 	 */
650 	public static dchar unicharToupper(dchar c)
651 	{
652 		return g_unichar_toupper(c);
653 	}
654 
655 	/**
656 	 * Classifies a Unicode character by type.
657 	 *
658 	 * Params:
659 	 *     c = a Unicode character
660 	 *
661 	 * Returns: the type of the character.
662 	 */
663 	public static GUnicodeType unicharType(dchar c)
664 	{
665 		return g_unichar_type(c);
666 	}
667 
668 	/**
669 	 * Checks whether @ch is a valid Unicode character. Some possible
670 	 * integer values of @ch will not be valid. 0 is considered a valid
671 	 * character, though it's normally a string terminator.
672 	 *
673 	 * Params:
674 	 *     ch = a Unicode character
675 	 *
676 	 * Returns: %TRUE if @ch is a valid Unicode character
677 	 */
678 	public static bool unicharValidate(dchar ch)
679 	{
680 		return g_unichar_validate(ch) != 0;
681 	}
682 
683 	/**
684 	 * Determines the numeric value of a character as a hexidecimal
685 	 * digit.
686 	 *
687 	 * Params:
688 	 *     c = a Unicode character
689 	 *
690 	 * Returns: If @c is a hex digit (according to
691 	 *     g_unichar_isxdigit()), its numeric value. Otherwise, -1.
692 	 */
693 	public static int unicharXdigitValue(dchar c)
694 	{
695 		return g_unichar_xdigit_value(c);
696 	}
697 
698 	/**
699 	 * Computes the canonical decomposition of a Unicode character.
700 	 *
701 	 * Deprecated: Use the more flexible g_unichar_fully_decompose()
702 	 * instead.
703 	 *
704 	 * Params:
705 	 *     ch = a Unicode character.
706 	 *     resultLen = location to store the length of the return value.
707 	 *
708 	 * Returns: a newly allocated string of Unicode characters.
709 	 *     @result_len is set to the resulting length of the string.
710 	 */
711 	public static dchar* unicodeCanonicalDecomposition(dchar ch, size_t* resultLen)
712 	{
713 		return g_unicode_canonical_decomposition(ch, resultLen);
714 	}
715 
716 	/**
717 	 * Computes the canonical ordering of a string in-place.
718 	 * This rearranges decomposed characters in the string
719 	 * according to their combining classes.  See the Unicode
720 	 * manual for more information.
721 	 *
722 	 * Params:
723 	 *     string_ = a UCS-4 encoded string.
724 	 *     len = the maximum length of @string to use.
725 	 */
726 	public static void unicodeCanonicalOrdering(dchar* string_, size_t len)
727 	{
728 		g_unicode_canonical_ordering(string_, len);
729 	}
730 
731 	/**
732 	 * Looks up the Unicode script for @iso15924.  ISO 15924 assigns four-letter
733 	 * codes to scripts.  For example, the code for Arabic is 'Arab'.
734 	 * This function accepts four letter codes encoded as a @guint32 in a
735 	 * big-endian fashion.  That is, the code expected for Arabic is
736 	 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
737 	 *
738 	 * See
739 	 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
740 	 * for details.
741 	 *
742 	 * Params:
743 	 *     iso15924 = a Unicode script
744 	 *
745 	 * Returns: the Unicode script for @iso15924, or
746 	 *     of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and
747 	 *     %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown.
748 	 *
749 	 * Since: 2.30
750 	 */
751 	public static GUnicodeScript unicodeScriptFromIso15924(uint iso15924)
752 	{
753 		return g_unicode_script_from_iso15924(iso15924);
754 	}
755 
756 	/**
757 	 * Looks up the ISO 15924 code for @script.  ISO 15924 assigns four-letter
758 	 * codes to scripts.  For example, the code for Arabic is 'Arab'.  The
759 	 * four letter codes are encoded as a @guint32 by this function in a
760 	 * big-endian fashion.  That is, the code returned for Arabic is
761 	 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
762 	 *
763 	 * See
764 	 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
765 	 * for details.
766 	 *
767 	 * Params:
768 	 *     script = a Unicode script
769 	 *
770 	 * Returns: the ISO 15924 code for @script, encoded as an integer,
771 	 *     of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or
772 	 *     ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood.
773 	 *
774 	 * Since: 2.30
775 	 */
776 	public static uint unicodeScriptToIso15924(GUnicodeScript script)
777 	{
778 		return g_unicode_script_to_iso15924(script);
779 	}
780 
781 	/**
782 	 * Convert a string from UTF-16 to UCS-4. The result will be
783 	 * nul-terminated.
784 	 *
785 	 * Params:
786 	 *     str = a UTF-16 encoded string
787 	 *     len = the maximum length (number of #gunichar2) of @str to use.
788 	 *         If @len < 0, then the string is nul-terminated.
789 	 *     itemsRead = location to store number of
790 	 *         words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
791 	 *         be returned in case @str contains a trailing partial character. If
792 	 *         an error occurs then the index of the invalid input is stored here.
793 	 *     itemsWritten = location to store number
794 	 *         of characters written, or %NULL. The value stored here does not include
795 	 *         the trailing 0 character.
796 	 *
797 	 * Returns: a pointer to a newly allocated UCS-4 string.
798 	 *     This value must be freed with g_free(). If an error occurs,
799 	 *     %NULL will be returned and @error set.
800 	 *
801 	 * Throws: GException on failure.
802 	 */
803 	public static dchar* utf16ToUcs4(wchar* str, glong len, out glong itemsRead, out glong itemsWritten)
804 	{
805 		GError* err = null;
806 
807 		auto __p = g_utf16_to_ucs4(str, len, &itemsRead, &itemsWritten, &err);
808 
809 		if (err !is null)
810 		{
811 			throw new GException( new ErrorG(err) );
812 		}
813 
814 		return __p;
815 	}
816 
817 	/**
818 	 * Convert a string from UTF-16 to UTF-8. The result will be
819 	 * terminated with a 0 byte.
820 	 *
821 	 * Note that the input is expected to be already in native endianness,
822 	 * an initial byte-order-mark character is not handled specially.
823 	 * g_convert() can be used to convert a byte buffer of UTF-16 data of
824 	 * ambiguous endianess.
825 	 *
826 	 * Further note that this function does not validate the result
827 	 * string; it may e.g. include embedded NUL characters. The only
828 	 * validation done by this function is to ensure that the input can
829 	 * be correctly interpreted as UTF-16, i.e. it doesn't contain
830 	 * things unpaired surrogates.
831 	 *
832 	 * Params:
833 	 *     str = a UTF-16 encoded string
834 	 *     len = the maximum length (number of #gunichar2) of @str to use.
835 	 *         If @len < 0, then the string is nul-terminated.
836 	 *     itemsRead = location to store number of
837 	 *         words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
838 	 *         be returned in case @str contains a trailing partial character. If
839 	 *         an error occurs then the index of the invalid input is stored here.
840 	 *     itemsWritten = location to store number
841 	 *         of bytes written, or %NULL. The value stored here does not include the
842 	 *         trailing 0 byte.
843 	 *
844 	 * Returns: a pointer to a newly allocated UTF-8 string.
845 	 *     This value must be freed with g_free(). If an error occurs,
846 	 *     %NULL will be returned and @error set.
847 	 *
848 	 * Throws: GException on failure.
849 	 */
850 	public static string utf16ToUtf8(wchar* str, glong len, out glong itemsRead, out glong itemsWritten)
851 	{
852 		GError* err = null;
853 
854 		auto retStr = g_utf16_to_utf8(str, len, &itemsRead, &itemsWritten, &err);
855 
856 		if (err !is null)
857 		{
858 			throw new GException( new ErrorG(err) );
859 		}
860 
861 		scope(exit) Str.freeString(retStr);
862 		return Str.toString(retStr);
863 	}
864 
865 	/**
866 	 * Converts a string into a form that is independent of case. The
867 	 * result will not correspond to any particular case, but can be
868 	 * compared for equality or ordered with the results of calling
869 	 * g_utf8_casefold() on other strings.
870 	 *
871 	 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
872 	 * only an approximation to the correct linguistic case insensitive
873 	 * ordering, though it is a fairly good one. Getting this exactly
874 	 * right would require a more sophisticated collation function that
875 	 * takes case sensitivity into account. GLib does not currently
876 	 * provide such a function.
877 	 *
878 	 * Params:
879 	 *     str = a UTF-8 encoded string
880 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
881 	 *
882 	 * Returns: a newly allocated string, that is a
883 	 *     case independent form of @str.
884 	 */
885 	public static string utf8Casefold(string str, ptrdiff_t len)
886 	{
887 		auto retStr = g_utf8_casefold(Str.toStringz(str), len);
888 
889 		scope(exit) Str.freeString(retStr);
890 		return Str.toString(retStr);
891 	}
892 
893 	/**
894 	 * Compares two strings for ordering using the linguistically
895 	 * correct rules for the [current locale][setlocale].
896 	 * When sorting a large number of strings, it will be significantly
897 	 * faster to obtain collation keys with g_utf8_collate_key() and
898 	 * compare the keys with strcmp() when sorting instead of sorting
899 	 * the original strings.
900 	 *
901 	 * Params:
902 	 *     str1 = a UTF-8 encoded string
903 	 *     str2 = a UTF-8 encoded string
904 	 *
905 	 * Returns: < 0 if @str1 compares before @str2,
906 	 *     0 if they compare equal, > 0 if @str1 compares after @str2.
907 	 */
908 	public static int utf8Collate(string str1, string str2)
909 	{
910 		return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
911 	}
912 
913 	/**
914 	 * Converts a string into a collation key that can be compared
915 	 * with other collation keys produced by the same function using
916 	 * strcmp().
917 	 *
918 	 * The results of comparing the collation keys of two strings
919 	 * with strcmp() will always be the same as comparing the two
920 	 * original keys with g_utf8_collate().
921 	 *
922 	 * Note that this function depends on the [current locale][setlocale].
923 	 *
924 	 * Params:
925 	 *     str = a UTF-8 encoded string.
926 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
927 	 *
928 	 * Returns: a newly allocated string. This string should
929 	 *     be freed with g_free() when you are done with it.
930 	 */
931 	public static string utf8CollateKey(string str, ptrdiff_t len)
932 	{
933 		auto retStr = g_utf8_collate_key(Str.toStringz(str), len);
934 
935 		scope(exit) Str.freeString(retStr);
936 		return Str.toString(retStr);
937 	}
938 
939 	/**
940 	 * Converts a string into a collation key that can be compared
941 	 * with other collation keys produced by the same function using strcmp().
942 	 *
943 	 * In order to sort filenames correctly, this function treats the dot '.'
944 	 * as a special case. Most dictionary orderings seem to consider it
945 	 * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
946 	 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
947 	 * would like to treat numbers intelligently so that "file1" "file10" "file5"
948 	 * is sorted as "file1" "file5" "file10".
949 	 *
950 	 * Note that this function depends on the [current locale][setlocale].
951 	 *
952 	 * Params:
953 	 *     str = a UTF-8 encoded string.
954 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
955 	 *
956 	 * Returns: a newly allocated string. This string should
957 	 *     be freed with g_free() when you are done with it.
958 	 *
959 	 * Since: 2.8
960 	 */
961 	public static string utf8CollateKeyForFilename(string str, ptrdiff_t len)
962 	{
963 		auto retStr = g_utf8_collate_key_for_filename(Str.toStringz(str), len);
964 
965 		scope(exit) Str.freeString(retStr);
966 		return Str.toString(retStr);
967 	}
968 
969 	/**
970 	 * Finds the start of the next UTF-8 character in the string after @p.
971 	 *
972 	 * @p does not have to be at the beginning of a UTF-8 character. No check
973 	 * is made to see if the character found is actually valid other than
974 	 * it starts with an appropriate byte.
975 	 *
976 	 * If @end is %NULL, the return value will never be %NULL: if the end of the
977 	 * string is reached, a pointer to the terminating nul byte is returned. If
978 	 * @end is non-%NULL, the return value will be %NULL if the end of the string
979 	 * is reached.
980 	 *
981 	 * Params:
982 	 *     p = a pointer to a position within a UTF-8 encoded string
983 	 *     end = a pointer to the byte following the end of the string,
984 	 *         or %NULL to indicate that the string is nul-terminated
985 	 *
986 	 * Returns: a pointer to the found character or %NULL if @end is
987 	 *     set and is reached
988 	 */
989 	public static string utf8FindNextChar(string p, string end)
990 	{
991 		return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)));
992 	}
993 
994 	/**
995 	 * Given a position @p with a UTF-8 encoded string @str, find the start
996 	 * of the previous UTF-8 character starting before @p. Returns %NULL if no
997 	 * UTF-8 characters are present in @str before @p.
998 	 *
999 	 * @p does not have to be at the beginning of a UTF-8 character. No check
1000 	 * is made to see if the character found is actually valid other than
1001 	 * it starts with an appropriate byte.
1002 	 *
1003 	 * Params:
1004 	 *     str = pointer to the beginning of a UTF-8 encoded string
1005 	 *     p = pointer to some position within @str
1006 	 *
1007 	 * Returns: a pointer to the found character or %NULL.
1008 	 */
1009 	public static string utf8FindPrevChar(string str, string p)
1010 	{
1011 		return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)));
1012 	}
1013 
1014 	/**
1015 	 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
1016 	 *
1017 	 * If @p does not point to a valid UTF-8 encoded character, results
1018 	 * are undefined. If you are not sure that the bytes are complete
1019 	 * valid Unicode characters, you should use g_utf8_get_char_validated()
1020 	 * instead.
1021 	 *
1022 	 * Params:
1023 	 *     p = a pointer to Unicode character encoded as UTF-8
1024 	 *
1025 	 * Returns: the resulting character
1026 	 */
1027 	public static dchar utf8GetChar(string p)
1028 	{
1029 		return g_utf8_get_char(Str.toStringz(p));
1030 	}
1031 
1032 	/**
1033 	 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
1034 	 * This function checks for incomplete characters, for invalid characters
1035 	 * such as characters that are out of the range of Unicode, and for
1036 	 * overlong encodings of valid characters.
1037 	 *
1038 	 * Note that g_utf8_get_char_validated() returns (gunichar)-2 if
1039 	 * @max_len is positive and any of the bytes in the first UTF-8 character
1040 	 * sequence are nul.
1041 	 *
1042 	 * Params:
1043 	 *     p = a pointer to Unicode character encoded as UTF-8
1044 	 *     maxLen = the maximum number of bytes to read, or -1 if @p is nul-terminated
1045 	 *
1046 	 * Returns: the resulting character. If @p points to a partial
1047 	 *     sequence at the end of a string that could begin a valid
1048 	 *     character (or if @max_len is zero), returns (gunichar)-2;
1049 	 *     otherwise, if @p does not point to a valid UTF-8 encoded
1050 	 *     Unicode character, returns (gunichar)-1.
1051 	 */
1052 	public static dchar utf8GetCharValidated(string p, ptrdiff_t maxLen)
1053 	{
1054 		return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
1055 	}
1056 
1057 	/**
1058 	 * Converts a string into canonical form, standardizing
1059 	 * such issues as whether a character with an accent
1060 	 * is represented as a base character and combining
1061 	 * accent or as a single precomposed character. The
1062 	 * string has to be valid UTF-8, otherwise %NULL is
1063 	 * returned. You should generally call g_utf8_normalize()
1064 	 * before comparing two Unicode strings.
1065 	 *
1066 	 * The normalization mode %G_NORMALIZE_DEFAULT only
1067 	 * standardizes differences that do not affect the
1068 	 * text content, such as the above-mentioned accent
1069 	 * representation. %G_NORMALIZE_ALL also standardizes
1070 	 * the "compatibility" characters in Unicode, such
1071 	 * as SUPERSCRIPT THREE to the standard forms
1072 	 * (in this case DIGIT THREE). Formatting information
1073 	 * may be lost but for most text operations such
1074 	 * characters should be considered the same.
1075 	 *
1076 	 * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
1077 	 * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
1078 	 * but returned a result with composed forms rather
1079 	 * than a maximally decomposed form. This is often
1080 	 * useful if you intend to convert the string to
1081 	 * a legacy encoding or pass it to a system with
1082 	 * less capable Unicode handling.
1083 	 *
1084 	 * Params:
1085 	 *     str = a UTF-8 encoded string.
1086 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1087 	 *     mode = the type of normalization to perform.
1088 	 *
1089 	 * Returns: a newly allocated string, that
1090 	 *     is the normalized form of @str, or %NULL if @str
1091 	 *     is not valid UTF-8.
1092 	 */
1093 	public static string utf8Normalize(string str, ptrdiff_t len, GNormalizeMode mode)
1094 	{
1095 		auto retStr = g_utf8_normalize(Str.toStringz(str), len, mode);
1096 
1097 		scope(exit) Str.freeString(retStr);
1098 		return Str.toString(retStr);
1099 	}
1100 
1101 	/**
1102 	 * Converts from an integer character offset to a pointer to a position
1103 	 * within the string.
1104 	 *
1105 	 * Since 2.10, this function allows to pass a negative @offset to
1106 	 * step backwards. It is usually worth stepping backwards from the end
1107 	 * instead of forwards if @offset is in the last fourth of the string,
1108 	 * since moving forward is about 3 times faster than moving backward.
1109 	 *
1110 	 * Note that this function doesn't abort when reaching the end of @str.
1111 	 * Therefore you should be sure that @offset is within string boundaries
1112 	 * before calling that function. Call g_utf8_strlen() when unsure.
1113 	 * This limitation exists as this function is called frequently during
1114 	 * text rendering and therefore has to be as fast as possible.
1115 	 *
1116 	 * Params:
1117 	 *     str = a UTF-8 encoded string
1118 	 *     offset = a character offset within @str
1119 	 *
1120 	 * Returns: the resulting pointer
1121 	 */
1122 	public static string utf8OffsetToPointer(string str, glong offset)
1123 	{
1124 		return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset));
1125 	}
1126 
1127 	/**
1128 	 * Converts from a pointer to position within a string to an integer
1129 	 * character offset.
1130 	 *
1131 	 * Since 2.10, this function allows @pos to be before @str, and returns
1132 	 * a negative offset in this case.
1133 	 *
1134 	 * Params:
1135 	 *     str = a UTF-8 encoded string
1136 	 *     pos = a pointer to a position within @str
1137 	 *
1138 	 * Returns: the resulting character offset
1139 	 */
1140 	public static glong utf8PointerToOffset(string str, string pos)
1141 	{
1142 		return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
1143 	}
1144 
1145 	/**
1146 	 * Finds the previous UTF-8 character in the string before @p.
1147 	 *
1148 	 * @p does not have to be at the beginning of a UTF-8 character. No check
1149 	 * is made to see if the character found is actually valid other than
1150 	 * it starts with an appropriate byte. If @p might be the first
1151 	 * character of the string, you must use g_utf8_find_prev_char() instead.
1152 	 *
1153 	 * Params:
1154 	 *     p = a pointer to a position within a UTF-8 encoded string
1155 	 *
1156 	 * Returns: a pointer to the found character
1157 	 */
1158 	public static string utf8PrevChar(string p)
1159 	{
1160 		return Str.toString(g_utf8_prev_char(Str.toStringz(p)));
1161 	}
1162 
1163 	/**
1164 	 * Finds the leftmost occurrence of the given Unicode character
1165 	 * in a UTF-8 encoded string, while limiting the search to @len bytes.
1166 	 * If @len is -1, allow unbounded search.
1167 	 *
1168 	 * Params:
1169 	 *     p = a nul-terminated UTF-8 encoded string
1170 	 *     len = the maximum length of @p
1171 	 *     c = a Unicode character
1172 	 *
1173 	 * Returns: %NULL if the string does not contain the character,
1174 	 *     otherwise, a pointer to the start of the leftmost occurrence
1175 	 *     of the character in the string.
1176 	 */
1177 	public static string utf8Strchr(string p, ptrdiff_t len, dchar c)
1178 	{
1179 		return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c));
1180 	}
1181 
1182 	/**
1183 	 * Converts all Unicode characters in the string that have a case
1184 	 * to lowercase. The exact manner that this is done depends
1185 	 * on the current locale, and may result in the number of
1186 	 * characters in the string changing.
1187 	 *
1188 	 * Params:
1189 	 *     str = a UTF-8 encoded string
1190 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1191 	 *
1192 	 * Returns: a newly allocated string, with all characters
1193 	 *     converted to lowercase.
1194 	 */
1195 	public static string utf8Strdown(string str, ptrdiff_t len)
1196 	{
1197 		auto retStr = g_utf8_strdown(Str.toStringz(str), len);
1198 
1199 		scope(exit) Str.freeString(retStr);
1200 		return Str.toString(retStr);
1201 	}
1202 
1203 	/**
1204 	 * Computes the length of the string in characters, not including
1205 	 * the terminating nul character. If the @max'th byte falls in the
1206 	 * middle of a character, the last (partial) character is not counted.
1207 	 *
1208 	 * Params:
1209 	 *     p = pointer to the start of a UTF-8 encoded string
1210 	 *     max = the maximum number of bytes to examine. If @max
1211 	 *         is less than 0, then the string is assumed to be
1212 	 *         nul-terminated. If @max is 0, @p will not be examined and
1213 	 *         may be %NULL. If @max is greater than 0, up to @max
1214 	 *         bytes are examined
1215 	 *
1216 	 * Returns: the length of the string in characters
1217 	 */
1218 	public static glong utf8Strlen(string p, ptrdiff_t max)
1219 	{
1220 		return g_utf8_strlen(Str.toStringz(p), max);
1221 	}
1222 
1223 	/**
1224 	 * Like the standard C strncpy() function, but copies a given number
1225 	 * of characters instead of a given number of bytes. The @src string
1226 	 * must be valid UTF-8 encoded text. (Use g_utf8_validate() on all
1227 	 * text before trying to use UTF-8 utility functions with it.)
1228 	 *
1229 	 * Note you must ensure @dest is at least 4 * @n to fit the
1230 	 * largest possible UTF-8 characters
1231 	 *
1232 	 * Params:
1233 	 *     dest = buffer to fill with characters from @src
1234 	 *     src = UTF-8 encoded string
1235 	 *     n = character count
1236 	 *
1237 	 * Returns: @dest
1238 	 */
1239 	public static string utf8Strncpy(string dest, string src, size_t n)
1240 	{
1241 		return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n));
1242 	}
1243 
1244 	/**
1245 	 * Find the rightmost occurrence of the given Unicode character
1246 	 * in a UTF-8 encoded string, while limiting the search to @len bytes.
1247 	 * If @len is -1, allow unbounded search.
1248 	 *
1249 	 * Params:
1250 	 *     p = a nul-terminated UTF-8 encoded string
1251 	 *     len = the maximum length of @p
1252 	 *     c = a Unicode character
1253 	 *
1254 	 * Returns: %NULL if the string does not contain the character,
1255 	 *     otherwise, a pointer to the start of the rightmost occurrence
1256 	 *     of the character in the string.
1257 	 */
1258 	public static string utf8Strrchr(string p, ptrdiff_t len, dchar c)
1259 	{
1260 		return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c));
1261 	}
1262 
1263 	/**
1264 	 * Reverses a UTF-8 string. @str must be valid UTF-8 encoded text.
1265 	 * (Use g_utf8_validate() on all text before trying to use UTF-8
1266 	 * utility functions with it.)
1267 	 *
1268 	 * This function is intended for programmatic uses of reversed strings.
1269 	 * It pays no attention to decomposed characters, combining marks, byte
1270 	 * order marks, directional indicators (LRM, LRO, etc) and similar
1271 	 * characters which might need special handling when reversing a string
1272 	 * for display purposes.
1273 	 *
1274 	 * Note that unlike g_strreverse(), this function returns
1275 	 * newly-allocated memory, which should be freed with g_free() when
1276 	 * no longer needed.
1277 	 *
1278 	 * Params:
1279 	 *     str = a UTF-8 encoded string
1280 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1281 	 *         then the string is nul-terminated.
1282 	 *
1283 	 * Returns: a newly-allocated string which is the reverse of @str
1284 	 *
1285 	 * Since: 2.2
1286 	 */
1287 	public static string utf8Strreverse(string str, ptrdiff_t len)
1288 	{
1289 		auto retStr = g_utf8_strreverse(Str.toStringz(str), len);
1290 
1291 		scope(exit) Str.freeString(retStr);
1292 		return Str.toString(retStr);
1293 	}
1294 
1295 	/**
1296 	 * Converts all Unicode characters in the string that have a case
1297 	 * to uppercase. The exact manner that this is done depends
1298 	 * on the current locale, and may result in the number of
1299 	 * characters in the string increasing. (For instance, the
1300 	 * German ess-zet will be changed to SS.)
1301 	 *
1302 	 * Params:
1303 	 *     str = a UTF-8 encoded string
1304 	 *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1305 	 *
1306 	 * Returns: a newly allocated string, with all characters
1307 	 *     converted to uppercase.
1308 	 */
1309 	public static string utf8Strup(string str, ptrdiff_t len)
1310 	{
1311 		auto retStr = g_utf8_strup(Str.toStringz(str), len);
1312 
1313 		scope(exit) Str.freeString(retStr);
1314 		return Str.toString(retStr);
1315 	}
1316 
1317 	/**
1318 	 * Copies a substring out of a UTF-8 encoded string.
1319 	 * The substring will contain @end_pos - @start_pos characters.
1320 	 *
1321 	 * Params:
1322 	 *     str = a UTF-8 encoded string
1323 	 *     startPos = a character offset within @str
1324 	 *     endPos = another character offset within @str
1325 	 *
1326 	 * Returns: a newly allocated copy of the requested
1327 	 *     substring. Free with g_free() when no longer needed.
1328 	 *
1329 	 * Since: 2.30
1330 	 */
1331 	public static string utf8Substring(string str, glong startPos, glong endPos)
1332 	{
1333 		auto retStr = g_utf8_substring(Str.toStringz(str), startPos, endPos);
1334 
1335 		scope(exit) Str.freeString(retStr);
1336 		return Str.toString(retStr);
1337 	}
1338 
1339 	/**
1340 	 * Convert a string from UTF-8 to a 32-bit fixed width
1341 	 * representation as UCS-4. A trailing 0 character will be added to the
1342 	 * string after the converted text.
1343 	 *
1344 	 * Params:
1345 	 *     str = a UTF-8 encoded string
1346 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1347 	 *         then the string is nul-terminated.
1348 	 *     itemsRead = location to store number of
1349 	 *         bytes read, or %NULL.
1350 	 *         If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
1351 	 *         returned in case @str contains a trailing partial
1352 	 *         character. If an error occurs then the index of the
1353 	 *         invalid input is stored here.
1354 	 *     itemsWritten = location to store number
1355 	 *         of characters written or %NULL. The value here stored does not include
1356 	 *         the trailing 0 character.
1357 	 *
1358 	 * Returns: a pointer to a newly allocated UCS-4 string.
1359 	 *     This value must be freed with g_free(). If an error occurs,
1360 	 *     %NULL will be returned and @error set.
1361 	 *
1362 	 * Throws: GException on failure.
1363 	 */
1364 	public static dchar* utf8ToUcs4(string str, glong len, out glong itemsRead, out glong itemsWritten)
1365 	{
1366 		GError* err = null;
1367 
1368 		auto __p = g_utf8_to_ucs4(Str.toStringz(str), len, &itemsRead, &itemsWritten, &err);
1369 
1370 		if (err !is null)
1371 		{
1372 			throw new GException( new ErrorG(err) );
1373 		}
1374 
1375 		return __p;
1376 	}
1377 
1378 	/**
1379 	 * Convert a string from UTF-8 to a 32-bit fixed width
1380 	 * representation as UCS-4, assuming valid UTF-8 input.
1381 	 * This function is roughly twice as fast as g_utf8_to_ucs4()
1382 	 * but does no error checking on the input. A trailing 0 character
1383 	 * will be added to the string after the converted text.
1384 	 *
1385 	 * Params:
1386 	 *     str = a UTF-8 encoded string
1387 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1388 	 *         then the string is nul-terminated.
1389 	 *     itemsWritten = location to store the
1390 	 *         number of characters in the result, or %NULL.
1391 	 *
1392 	 * Returns: a pointer to a newly allocated UCS-4 string.
1393 	 *     This value must be freed with g_free().
1394 	 */
1395 	public static dchar* utf8ToUcs4Fast(string str, glong len, out glong itemsWritten)
1396 	{
1397 		return g_utf8_to_ucs4_fast(Str.toStringz(str), len, &itemsWritten);
1398 	}
1399 
1400 	/**
1401 	 * Convert a string from UTF-8 to UTF-16. A 0 character will be
1402 	 * added to the result after the converted text.
1403 	 *
1404 	 * Params:
1405 	 *     str = a UTF-8 encoded string
1406 	 *     len = the maximum length (number of bytes) of @str to use.
1407 	 *         If @len < 0, then the string is nul-terminated.
1408 	 *     itemsRead = location to store number of
1409 	 *         bytes read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
1410 	 *         be returned in case @str contains a trailing partial character. If
1411 	 *         an error occurs then the index of the invalid input is stored here.
1412 	 *     itemsWritten = location to store number
1413 	 *         of #gunichar2 written, or %NULL. The value stored here does not include
1414 	 *         the trailing 0.
1415 	 *
1416 	 * Returns: a pointer to a newly allocated UTF-16 string.
1417 	 *     This value must be freed with g_free(). If an error occurs,
1418 	 *     %NULL will be returned and @error set.
1419 	 *
1420 	 * Throws: GException on failure.
1421 	 */
1422 	public static wchar* utf8ToUtf16(string str, glong len, out glong itemsRead, out glong itemsWritten)
1423 	{
1424 		GError* err = null;
1425 
1426 		auto __p = g_utf8_to_utf16(Str.toStringz(str), len, &itemsRead, &itemsWritten, &err);
1427 
1428 		if (err !is null)
1429 		{
1430 			throw new GException( new ErrorG(err) );
1431 		}
1432 
1433 		return __p;
1434 	}
1435 
1436 	/**
1437 	 * Validates UTF-8 encoded text. @str is the text to validate;
1438 	 * if @str is nul-terminated, then @max_len can be -1, otherwise
1439 	 * @max_len should be the number of bytes to validate.
1440 	 * If @end is non-%NULL, then the end of the valid range
1441 	 * will be stored there (i.e. the start of the first invalid
1442 	 * character if some bytes were invalid, or the end of the text
1443 	 * being validated otherwise).
1444 	 *
1445 	 * Note that g_utf8_validate() returns %FALSE if @max_len is
1446 	 * positive and any of the @max_len bytes are nul.
1447 	 *
1448 	 * Returns %TRUE if all of @str was valid. Many GLib and GTK+
1449 	 * routines require valid UTF-8 as input; so data read from a file
1450 	 * or the network should be checked with g_utf8_validate() before
1451 	 * doing anything else with it.
1452 	 *
1453 	 * Params:
1454 	 *     str = a pointer to character data
1455 	 *     end = return location for end of valid data
1456 	 *
1457 	 * Returns: %TRUE if the text was valid UTF-8
1458 	 */
1459 	public static bool utf8Validate(string str, out string end)
1460 	{
1461 		char* outend = null;
1462 
1463 		auto __p = g_utf8_validate(Str.toStringz(str), cast(ptrdiff_t)str.length, &outend) != 0;
1464 
1465 		end = Str.toString(outend);
1466 
1467 		return __p;
1468 	}
1469 
1470 	/**
1471 	 * If the provided string is valid UTF-8, return a copy of it. If not,
1472 	 * return a copy in which bytes that could not be interpreted as valid Unicode
1473 	 * are replaced with the Unicode replacement character (U+FFFD).
1474 	 *
1475 	 * For example, this is an appropriate function to use if you have received
1476 	 * a string that was incorrectly declared to be UTF-8, and you need a valid
1477 	 * UTF-8 version of it that can be logged or displayed to the user, with the
1478 	 * assumption that it is close enough to ASCII or UTF-8 to be mostly
1479 	 * readable as-is.
1480 	 *
1481 	 * Params:
1482 	 *     str = string to coerce into UTF-8
1483 	 *     len = the maximum length of @str to use, in bytes. If @len < 0,
1484 	 *         then the string is nul-terminated.
1485 	 *
1486 	 * Returns: a valid UTF-8 string whose content resembles @str
1487 	 *
1488 	 * Since: 2.52
1489 	 */
1490 	public static string utf8MakeValid(string str, ptrdiff_t len)
1491 	{
1492 		auto retStr = g_utf8_make_valid(Str.toStringz(str), len);
1493 
1494 		scope(exit) Str.freeString(retStr);
1495 		return Str.toString(retStr);
1496 	}
1497 
1498 	/**
1499 	 * Validates UTF-8 encoded text.
1500 	 *
1501 	 * As with g_utf8_validate(), but @max_len must be set, and hence this function
1502 	 * will always return %FALSE if any of the bytes of @str are nul.
1503 	 *
1504 	 * Params:
1505 	 *     str = a pointer to character data
1506 	 *     end = return location for end of valid data
1507 	 *
1508 	 * Returns: %TRUE if the text was valid UTF-8
1509 	 *
1510 	 * Since: 2.60
1511 	 */
1512 	public static bool utf8ValidateLen(string str, out string end)
1513 	{
1514 		char* outend = null;
1515 
1516 		auto __p = g_utf8_validate_len(Str.toStringz(str), cast(size_t)str.length, &outend) != 0;
1517 
1518 		end = Str.toString(outend);
1519 
1520 		return __p;
1521 	}
1522 }