1 /*
2 * This file is part of gtkD.
3 *
4 * gtkD is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3
7 * of the License, or (at your option) any later version, with
8 * some exceptions, please read the COPYING file.
9 *
10 * gtkD is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with gtkD; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18 */19 20 // generated automatically - do not change21 // find conversion definition on APILookup.txt22 // implement new conversion functionalities on the wrap.utils pakage23 24 25 moduleglib.Unicode;
26 27 privateimportglib.ErrorG;
28 privateimportglib.GException;
29 privateimportglib.Str;
30 privateimportglib.c.functions;
31 publicimportglib.c.types;
32 33 34 /** */35 publicstructUnicode36 {
37 38 /**
39 * Convert a string from UCS-4 to UTF-16. A 0 character will be
40 * added to the result after the converted text.
41 *
42 * Params:
43 * str = a UCS-4 encoded string
44 * len = the maximum length (number of characters) of @str to use.
45 * If @len < 0, then the string is nul-terminated.
46 * itemsRead = location to store number of
47 * bytes read, or %NULL. If an error occurs then the index of the invalid
48 * input is stored here.
49 * itemsWritten = location to store number
50 * of #gunichar2 written, or %NULL. The value stored here does not include
51 * the trailing 0.
52 *
53 * Returns: a pointer to a newly allocated UTF-16 string.
54 * This value must be freed with g_free(). If an error occurs,
55 * %NULL will be returned and @error set.
56 *
57 * Throws: GException on failure.
58 */59 publicstaticwchar* ucs4ToUtf16(dchar* str, glonglen, outglongitemsRead, outglongitemsWritten)
60 {
61 GError* err = null;
62 63 auto__p = g_ucs4_to_utf16(str, len, &itemsRead, &itemsWritten, &err);
64 65 if (err !isnull)
66 {
67 thrownewGException( newErrorG(err) );
68 }
69 70 return__p;
71 }
72 73 /**
74 * Convert a string from a 32-bit fixed width representation as UCS-4.
75 * to UTF-8. The result will be terminated with a 0 byte.
76 *
77 * Params:
78 * str = a UCS-4 encoded string
79 * len = the maximum length (number of characters) of @str to use.
80 * If @len < 0, then the string is nul-terminated.
81 * itemsRead = location to store number of
82 * characters read, or %NULL.
83 * itemsWritten = location to store number
84 * of bytes written or %NULL. The value here stored does not include the
85 * trailing 0 byte.
86 *
87 * Returns: a pointer to a newly allocated UTF-8 string.
88 * This value must be freed with g_free(). If an error occurs,
89 * %NULL will be returned and @error set. In that case, @items_read
90 * will be set to the position of the first invalid input character.
91 *
92 * Throws: GException on failure.
93 */94 publicstaticstringucs4ToUtf8(dchar* str, glonglen, outglongitemsRead, outglongitemsWritten)
95 {
96 GError* err = null;
97 98 autoretStr = g_ucs4_to_utf8(str, len, &itemsRead, &itemsWritten, &err);
99 100 if (err !isnull)
101 {
102 thrownewGException( newErrorG(err) );
103 }
104 105 scope(exit) Str.freeString(retStr);
106 returnStr.toString(retStr);
107 }
108 109 /**
110 * Determines the break type of @c. @c should be a Unicode character
111 * (to derive a character from UTF-8 encoded text, use
112 * g_utf8_get_char()). The break type is used to find word and line
113 * breaks ("text boundaries"), Pango implements the Unicode boundary
114 * resolution algorithms and normally you would use a function such
115 * as pango_break() instead of caring about break types yourself.
116 *
117 * Params:
118 * c = a Unicode character
119 *
120 * Returns: the break type of @c
121 */122 publicstaticGUnicodeBreakTypeunicharBreakType(dcharc)
123 {
124 returng_unichar_break_type(c);
125 }
126 127 /**
128 * Determines the canonical combining class of a Unicode character.
129 *
130 * Params:
131 * uc = a Unicode character
132 *
133 * Returns: the combining class of the character
134 *
135 * Since: 2.14
136 */137 publicstaticintunicharCombiningClass(dcharuc)
138 {
139 returng_unichar_combining_class(uc);
140 }
141 142 /**
143 * Performs a single composition step of the
144 * Unicode canonical composition algorithm.
145 *
146 * This function includes algorithmic Hangul Jamo composition,
147 * but it is not exactly the inverse of g_unichar_decompose().
148 * No composition can have either of @a or @b equal to zero.
149 * To be precise, this function composes if and only if
150 * there exists a Primary Composite P which is canonically
151 * equivalent to the sequence <@a,@b>. See the Unicode
152 * Standard for the definition of Primary Composite.
153 *
154 * If @a and @b do not compose a new character, @ch is set to zero.
155 *
156 * See
157 * [UAX#15](http://unicode.org/reports/tr15/)
158 * for details.
159 *
160 * Params:
161 * a = a Unicode character
162 * b = a Unicode character
163 * ch = return location for the composed character
164 *
165 * Returns: %TRUE if the characters could be composed
166 *
167 * Since: 2.30
168 */169 publicstaticboolunicharCompose(dchara, dcharb, outdcharch)
170 {
171 returng_unichar_compose(a, b, &ch) != 0;
172 }
173 174 /**
175 * Performs a single decomposition step of the
176 * Unicode canonical decomposition algorithm.
177 *
178 * This function does not include compatibility
179 * decompositions. It does, however, include algorithmic
180 * Hangul Jamo decomposition, as well as 'singleton'
181 * decompositions which replace a character by a single
182 * other character. In the case of singletons *@b will
183 * be set to zero.
184 *
185 * If @ch is not decomposable, *@a is set to @ch and *@b
186 * is set to zero.
187 *
188 * Note that the way Unicode decomposition pairs are
189 * defined, it is guaranteed that @b would not decompose
190 * further, but @a may itself decompose. To get the full
191 * canonical decomposition for @ch, one would need to
192 * recursively call this function on @a. Or use
193 * g_unichar_fully_decompose().
194 *
195 * See
196 * [UAX#15](http://unicode.org/reports/tr15/)
197 * for details.
198 *
199 * Params:
200 * ch = a Unicode character
201 * a = return location for the first component of @ch
202 * b = return location for the second component of @ch
203 *
204 * Returns: %TRUE if the character could be decomposed
205 *
206 * Since: 2.30
207 */208 publicstaticboolunicharDecompose(dcharch, outdchara, outdcharb)
209 {
210 returng_unichar_decompose(ch, &a, &b) != 0;
211 }
212 213 /**
214 * Determines the numeric value of a character as a decimal
215 * digit.
216 *
217 * Params:
218 * c = a Unicode character
219 *
220 * Returns: If @c is a decimal digit (according to
221 * g_unichar_isdigit()), its numeric value. Otherwise, -1.
222 */223 publicstaticintunicharDigitValue(dcharc)
224 {
225 returng_unichar_digit_value(c);
226 }
227 228 /**
229 * Computes the canonical or compatibility decomposition of a
230 * Unicode character. For compatibility decomposition,
231 * pass %TRUE for @compat; for canonical decomposition
232 * pass %FALSE for @compat.
233 *
234 * The decomposed sequence is placed in @result. Only up to
235 * @result_len characters are written into @result. The length
236 * of the full decomposition (irrespective of @result_len) is
237 * returned by the function. For canonical decomposition,
238 * currently all decompositions are of length at most 4, but
239 * this may change in the future (very unlikely though).
240 * At any rate, Unicode does guarantee that a buffer of length
241 * 18 is always enough for both compatibility and canonical
242 * decompositions, so that is the size recommended. This is provided
243 * as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.
244 *
245 * See
246 * [UAX#15](http://unicode.org/reports/tr15/)
247 * for details.
248 *
249 * Params:
250 * ch = a Unicode character.
251 * compat = whether perform canonical or compatibility decomposition
252 * result = location to store decomposed result, or %NULL
253 * resultLen = length of @result
254 *
255 * Returns: the length of the full decomposition.
256 *
257 * Since: 2.30
258 */259 publicstaticsize_tunicharFullyDecompose(dcharch, boolcompat, outdcharresult, size_tresultLen)
260 {
261 returng_unichar_fully_decompose(ch, compat, &result, resultLen);
262 }
263 264 /**
265 * In Unicode, some characters are "mirrored". This means that their
266 * images are mirrored horizontally in text that is laid out from right
267 * to left. For instance, "(" would become its mirror image, ")", in
268 * right-to-left text.
269 *
270 * If @ch has the Unicode mirrored property and there is another unicode
271 * character that typically has a glyph that is the mirror image of @ch's
272 * glyph and @mirrored_ch is set, it puts that character in the address
273 * pointed to by @mirrored_ch. Otherwise the original character is put.
274 *
275 * Params:
276 * ch = a Unicode character
277 * mirroredCh = location to store the mirrored character
278 *
279 * Returns: %TRUE if @ch has a mirrored character, %FALSE otherwise
280 *
281 * Since: 2.4
282 */283 publicstaticboolunicharGetMirrorChar(dcharch, dchar* mirroredCh)
284 {
285 returng_unichar_get_mirror_char(ch, mirroredCh) != 0;
286 }
287 288 /**
289 * Looks up the #GUnicodeScript for a particular character (as defined
290 * by Unicode Standard Annex \#24). No check is made for @ch being a
291 * valid Unicode character; if you pass in invalid character, the
292 * result is undefined.
293 *
294 * This function is equivalent to pango_script_for_unichar() and the
295 * two are interchangeable.
296 *
297 * Params:
298 * ch = a Unicode character
299 *
300 * Returns: the #GUnicodeScript for the character.
301 *
302 * Since: 2.14
303 */304 publicstaticGUnicodeScriptunicharGetScript(dcharch)
305 {
306 returng_unichar_get_script(ch);
307 }
308 309 /**
310 * Determines whether a character is alphanumeric.
311 * Given some UTF-8 text, obtain a character value
312 * with g_utf8_get_char().
313 *
314 * Params:
315 * c = a Unicode character
316 *
317 * Returns: %TRUE if @c is an alphanumeric character
318 */319 publicstaticboolunicharIsalnum(dcharc)
320 {
321 returng_unichar_isalnum(c) != 0;
322 }
323 324 /**
325 * Determines whether a character is alphabetic (i.e. a letter).
326 * Given some UTF-8 text, obtain a character value with
327 * g_utf8_get_char().
328 *
329 * Params:
330 * c = a Unicode character
331 *
332 * Returns: %TRUE if @c is an alphabetic character
333 */334 publicstaticboolunicharIsalpha(dcharc)
335 {
336 returng_unichar_isalpha(c) != 0;
337 }
338 339 /**
340 * Determines whether a character is a control character.
341 * Given some UTF-8 text, obtain a character value with
342 * g_utf8_get_char().
343 *
344 * Params:
345 * c = a Unicode character
346 *
347 * Returns: %TRUE if @c is a control character
348 */349 publicstaticboolunicharIscntrl(dcharc)
350 {
351 returng_unichar_iscntrl(c) != 0;
352 }
353 354 /**
355 * Determines if a given character is assigned in the Unicode
356 * standard.
357 *
358 * Params:
359 * c = a Unicode character
360 *
361 * Returns: %TRUE if the character has an assigned value
362 */363 publicstaticboolunicharIsdefined(dcharc)
364 {
365 returng_unichar_isdefined(c) != 0;
366 }
367 368 /**
369 * Determines whether a character is numeric (i.e. a digit). This
370 * covers ASCII 0-9 and also digits in other languages/scripts. Given
371 * some UTF-8 text, obtain a character value with g_utf8_get_char().
372 *
373 * Params:
374 * c = a Unicode character
375 *
376 * Returns: %TRUE if @c is a digit
377 */378 publicstaticboolunicharIsdigit(dcharc)
379 {
380 returng_unichar_isdigit(c) != 0;
381 }
382 383 /**
384 * Determines whether a character is printable and not a space
385 * (returns %FALSE for control characters, format characters, and
386 * spaces). g_unichar_isprint() is similar, but returns %TRUE for
387 * spaces. Given some UTF-8 text, obtain a character value with
388 * g_utf8_get_char().
389 *
390 * Params:
391 * c = a Unicode character
392 *
393 * Returns: %TRUE if @c is printable unless it's a space
394 */395 publicstaticboolunicharIsgraph(dcharc)
396 {
397 returng_unichar_isgraph(c) != 0;
398 }
399 400 /**
401 * Determines whether a character is a lowercase letter.
402 * Given some UTF-8 text, obtain a character value with
403 * g_utf8_get_char().
404 *
405 * Params:
406 * c = a Unicode character
407 *
408 * Returns: %TRUE if @c is a lowercase letter
409 */410 publicstaticboolunicharIslower(dcharc)
411 {
412 returng_unichar_islower(c) != 0;
413 }
414 415 /**
416 * Determines whether a character is a mark (non-spacing mark,
417 * combining mark, or enclosing mark in Unicode speak).
418 * Given some UTF-8 text, obtain a character value
419 * with g_utf8_get_char().
420 *
421 * Note: in most cases where isalpha characters are allowed,
422 * ismark characters should be allowed to as they are essential
423 * for writing most European languages as well as many non-Latin
424 * scripts.
425 *
426 * Params:
427 * c = a Unicode character
428 *
429 * Returns: %TRUE if @c is a mark character
430 *
431 * Since: 2.14
432 */433 publicstaticboolunicharIsmark(dcharc)
434 {
435 returng_unichar_ismark(c) != 0;
436 }
437 438 /**
439 * Determines whether a character is printable.
440 * Unlike g_unichar_isgraph(), returns %TRUE for spaces.
441 * Given some UTF-8 text, obtain a character value with
442 * g_utf8_get_char().
443 *
444 * Params:
445 * c = a Unicode character
446 *
447 * Returns: %TRUE if @c is printable
448 */449 publicstaticboolunicharIsprint(dcharc)
450 {
451 returng_unichar_isprint(c) != 0;
452 }
453 454 /**
455 * Determines whether a character is punctuation or a symbol.
456 * Given some UTF-8 text, obtain a character value with
457 * g_utf8_get_char().
458 *
459 * Params:
460 * c = a Unicode character
461 *
462 * Returns: %TRUE if @c is a punctuation or symbol character
463 */464 publicstaticboolunicharIspunct(dcharc)
465 {
466 returng_unichar_ispunct(c) != 0;
467 }
468 469 /**
470 * Determines whether a character is a space, tab, or line separator
471 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a
472 * character value with g_utf8_get_char().
473 *
474 * (Note: don't use this to do word breaking; you have to use
475 * Pango or equivalent to get word breaking right, the algorithm
476 * is fairly complex.)
477 *
478 * Params:
479 * c = a Unicode character
480 *
481 * Returns: %TRUE if @c is a space character
482 */483 publicstaticboolunicharIsspace(dcharc)
484 {
485 returng_unichar_isspace(c) != 0;
486 }
487 488 /**
489 * Determines if a character is titlecase. Some characters in
490 * Unicode which are composites, such as the DZ digraph
491 * have three case variants instead of just two. The titlecase
492 * form is used at the beginning of a word where only the
493 * first letter is capitalized. The titlecase form of the DZ
494 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
495 *
496 * Params:
497 * c = a Unicode character
498 *
499 * Returns: %TRUE if the character is titlecase
500 */501 publicstaticboolunicharIstitle(dcharc)
502 {
503 returng_unichar_istitle(c) != 0;
504 }
505 506 /**
507 * Determines if a character is uppercase.
508 *
509 * Params:
510 * c = a Unicode character
511 *
512 * Returns: %TRUE if @c is an uppercase character
513 */514 publicstaticboolunicharIsupper(dcharc)
515 {
516 returng_unichar_isupper(c) != 0;
517 }
518 519 /**
520 * Determines if a character is typically rendered in a double-width
521 * cell.
522 *
523 * Params:
524 * c = a Unicode character
525 *
526 * Returns: %TRUE if the character is wide
527 */528 publicstaticboolunicharIswide(dcharc)
529 {
530 returng_unichar_iswide(c) != 0;
531 }
532 533 /**
534 * Determines if a character is typically rendered in a double-width
535 * cell under legacy East Asian locales. If a character is wide according to
536 * g_unichar_iswide(), then it is also reported wide with this function, but
537 * the converse is not necessarily true. See the
538 * [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
539 * for details.
540 *
541 * If a character passes the g_unichar_iswide() test then it will also pass
542 * this test, but not the other way around. Note that some characters may
543 * pass both this test and g_unichar_iszerowidth().
544 *
545 * Params:
546 * c = a Unicode character
547 *
548 * Returns: %TRUE if the character is wide in legacy East Asian locales
549 *
550 * Since: 2.12
551 */552 publicstaticboolunicharIswideCjk(dcharc)
553 {
554 returng_unichar_iswide_cjk(c) != 0;
555 }
556 557 /**
558 * Determines if a character is a hexadecimal digit.
559 *
560 * Params:
561 * c = a Unicode character.
562 *
563 * Returns: %TRUE if the character is a hexadecimal digit
564 */565 publicstaticboolunicharIsxdigit(dcharc)
566 {
567 returng_unichar_isxdigit(c) != 0;
568 }
569 570 /**
571 * Determines if a given character typically takes zero width when rendered.
572 * The return value is %TRUE for all non-spacing and enclosing marks
573 * (e.g., combining accents), format characters, zero-width
574 * space, but not U+00AD SOFT HYPHEN.
575 *
576 * A typical use of this function is with one of g_unichar_iswide() or
577 * g_unichar_iswide_cjk() to determine the number of cells a string occupies
578 * when displayed on a grid display (terminals). However, note that not all
579 * terminals support zero-width rendering of zero-width marks.
580 *
581 * Params:
582 * c = a Unicode character
583 *
584 * Returns: %TRUE if the character has zero width
585 *
586 * Since: 2.14
587 */588 publicstaticboolunicharIszerowidth(dcharc)
589 {
590 returng_unichar_iszerowidth(c) != 0;
591 }
592 593 /**
594 * Converts a single character to UTF-8.
595 *
596 * Params:
597 * c = a Unicode character code
598 * outbuf = output buffer, must have at
599 * least 6 bytes of space. If %NULL, the length will be computed and
600 * returned and nothing will be written to @outbuf.
601 *
602 * Returns: number of bytes written
603 */604 publicstaticintunicharToUtf8(dcharc, outchar[] outbuf)
605 {
606 returng_unichar_to_utf8(c, outbuf.ptr);
607 }
608 609 /**
610 * Converts a character to lower case.
611 *
612 * Params:
613 * c = a Unicode character.
614 *
615 * Returns: the result of converting @c to lower case.
616 * If @c is not an upperlower or titlecase character,
617 * or has no lowercase equivalent @c is returned unchanged.
618 */619 publicstaticdcharunicharTolower(dcharc)
620 {
621 returng_unichar_tolower(c);
622 }
623 624 /**
625 * Converts a character to the titlecase.
626 *
627 * Params:
628 * c = a Unicode character
629 *
630 * Returns: the result of converting @c to titlecase.
631 * If @c is not an uppercase or lowercase character,
632 * @c is returned unchanged.
633 */634 publicstaticdcharunicharTotitle(dcharc)
635 {
636 returng_unichar_totitle(c);
637 }
638 639 /**
640 * Converts a character to uppercase.
641 *
642 * Params:
643 * c = a Unicode character
644 *
645 * Returns: the result of converting @c to uppercase.
646 * If @c is not a lowercase or titlecase character,
647 * or has no upper case equivalent @c is returned unchanged.
648 */649 publicstaticdcharunicharToupper(dcharc)
650 {
651 returng_unichar_toupper(c);
652 }
653 654 /**
655 * Classifies a Unicode character by type.
656 *
657 * Params:
658 * c = a Unicode character
659 *
660 * Returns: the type of the character.
661 */662 publicstaticGUnicodeTypeunicharType(dcharc)
663 {
664 returng_unichar_type(c);
665 }
666 667 /**
668 * Checks whether @ch is a valid Unicode character. Some possible
669 * integer values of @ch will not be valid. 0 is considered a valid
670 * character, though it's normally a string terminator.
671 *
672 * Params:
673 * ch = a Unicode character
674 *
675 * Returns: %TRUE if @ch is a valid Unicode character
676 */677 publicstaticboolunicharValidate(dcharch)
678 {
679 returng_unichar_validate(ch) != 0;
680 }
681 682 /**
683 * Determines the numeric value of a character as a hexadecimal
684 * digit.
685 *
686 * Params:
687 * c = a Unicode character
688 *
689 * Returns: If @c is a hex digit (according to
690 * g_unichar_isxdigit()), its numeric value. Otherwise, -1.
691 */692 publicstaticintunicharXdigitValue(dcharc)
693 {
694 returng_unichar_xdigit_value(c);
695 }
696 697 /**
698 * Computes the canonical decomposition of a Unicode character.
699 *
700 * Deprecated: Use the more flexible g_unichar_fully_decompose()
701 * instead.
702 *
703 * Params:
704 * ch = a Unicode character.
705 * resultLen = location to store the length of the return value.
706 *
707 * Returns: a newly allocated string of Unicode characters.
708 * @result_len is set to the resulting length of the string.
709 */710 publicstaticdchar* unicodeCanonicalDecomposition(dcharch, size_t* resultLen)
711 {
712 returng_unicode_canonical_decomposition(ch, resultLen);
713 }
714 715 /**
716 * Computes the canonical ordering of a string in-place.
717 * This rearranges decomposed characters in the string
718 * according to their combining classes. See the Unicode
719 * manual for more information.
720 *
721 * Params:
722 * string_ = a UCS-4 encoded string.
723 * len = the maximum length of @string to use.
724 */725 publicstaticvoidunicodeCanonicalOrdering(dchar* string_, size_tlen)
726 {
727 g_unicode_canonical_ordering(string_, len);
728 }
729 730 /**
731 * Looks up the Unicode script for @iso15924. ISO 15924 assigns four-letter
732 * codes to scripts. For example, the code for Arabic is 'Arab'.
733 * This function accepts four letter codes encoded as a @guint32 in a
734 * big-endian fashion. That is, the code expected for Arabic is
735 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
736 *
737 * See
738 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
739 * for details.
740 *
741 * Params:
742 * iso15924 = a Unicode script
743 *
744 * Returns: the Unicode script for @iso15924, or
745 * of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and
746 * %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown.
747 *
748 * Since: 2.30
749 */750 publicstaticGUnicodeScriptunicodeScriptFromIso15924(uintiso15924)
751 {
752 returng_unicode_script_from_iso15924(iso15924);
753 }
754 755 /**
756 * Looks up the ISO 15924 code for @script. ISO 15924 assigns four-letter
757 * codes to scripts. For example, the code for Arabic is 'Arab'. The
758 * four letter codes are encoded as a @guint32 by this function in a
759 * big-endian fashion. That is, the code returned for Arabic is
760 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
761 *
762 * See
763 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
764 * for details.
765 *
766 * Params:
767 * script = a Unicode script
768 *
769 * Returns: the ISO 15924 code for @script, encoded as an integer,
770 * of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or
771 * ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood.
772 *
773 * Since: 2.30
774 */775 publicstaticuintunicodeScriptToIso15924(GUnicodeScriptscript)
776 {
777 returng_unicode_script_to_iso15924(script);
778 }
779 780 /**
781 * Convert a string from UTF-16 to UCS-4. The result will be
782 * nul-terminated.
783 *
784 * Params:
785 * str = a UTF-16 encoded string
786 * len = the maximum length (number of #gunichar2) of @str to use.
787 * If @len < 0, then the string is nul-terminated.
788 * itemsRead = location to store number of
789 * words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
790 * be returned in case @str contains a trailing partial character. If
791 * an error occurs then the index of the invalid input is stored here.
792 * itemsWritten = location to store number
793 * of characters written, or %NULL. The value stored here does not include
794 * the trailing 0 character.
795 *
796 * Returns: a pointer to a newly allocated UCS-4 string.
797 * This value must be freed with g_free(). If an error occurs,
798 * %NULL will be returned and @error set.
799 *
800 * Throws: GException on failure.
801 */802 publicstaticdchar* utf16ToUcs4(wchar* str, glonglen, outglongitemsRead, outglongitemsWritten)
803 {
804 GError* err = null;
805 806 auto__p = g_utf16_to_ucs4(str, len, &itemsRead, &itemsWritten, &err);
807 808 if (err !isnull)
809 {
810 thrownewGException( newErrorG(err) );
811 }
812 813 return__p;
814 }
815 816 /**
817 * Convert a string from UTF-16 to UTF-8. The result will be
818 * terminated with a 0 byte.
819 *
820 * Note that the input is expected to be already in native endianness,
821 * an initial byte-order-mark character is not handled specially.
822 * g_convert() can be used to convert a byte buffer of UTF-16 data of
823 * ambiguous endianness.
824 *
825 * Further note that this function does not validate the result
826 * string; it may e.g. include embedded NUL characters. The only
827 * validation done by this function is to ensure that the input can
828 * be correctly interpreted as UTF-16, i.e. it doesn't contain
829 * unpaired surrogates or partial character sequences.
830 *
831 * Params:
832 * str = a UTF-16 encoded string
833 * len = the maximum length (number of #gunichar2) of @str to use.
834 * If @len < 0, then the string is nul-terminated.
835 * itemsRead = location to store number of
836 * words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
837 * be returned in case @str contains a trailing partial character. If
838 * an error occurs then the index of the invalid input is stored here.
839 * itemsWritten = location to store number
840 * of bytes written, or %NULL. The value stored here does not include the
841 * trailing 0 byte.
842 *
843 * Returns: a pointer to a newly allocated UTF-8 string.
844 * This value must be freed with g_free(). If an error occurs,
845 * %NULL will be returned and @error set.
846 *
847 * Throws: GException on failure.
848 */849 publicstaticstringutf16ToUtf8(wchar* str, glonglen, outglongitemsRead, outglongitemsWritten)
850 {
851 GError* err = null;
852 853 autoretStr = g_utf16_to_utf8(str, len, &itemsRead, &itemsWritten, &err);
854 855 if (err !isnull)
856 {
857 thrownewGException( newErrorG(err) );
858 }
859 860 scope(exit) Str.freeString(retStr);
861 returnStr.toString(retStr);
862 }
863 864 /**
865 * Converts a string into a form that is independent of case. The
866 * result will not correspond to any particular case, but can be
867 * compared for equality or ordered with the results of calling
868 * g_utf8_casefold() on other strings.
869 *
870 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
871 * only an approximation to the correct linguistic case insensitive
872 * ordering, though it is a fairly good one. Getting this exactly
873 * right would require a more sophisticated collation function that
874 * takes case sensitivity into account. GLib does not currently
875 * provide such a function.
876 *
877 * Params:
878 * str = a UTF-8 encoded string
879 * len = length of @str, in bytes, or -1 if @str is nul-terminated.
880 *
881 * Returns: a newly allocated string, that is a
882 * case independent form of @str.
883 */884 publicstaticstringutf8Casefold(stringstr, ptrdiff_tlen)
885 {
886 autoretStr = g_utf8_casefold(Str.toStringz(str), len);
887 888 scope(exit) Str.freeString(retStr);
889 returnStr.toString(retStr);
890 }
891 892 /**
893 * Compares two strings for ordering using the linguistically
894 * correct rules for the [current locale][setlocale].
895 * When sorting a large number of strings, it will be significantly
896 * faster to obtain collation keys with g_utf8_collate_key() and
897 * compare the keys with strcmp() when sorting instead of sorting
898 * the original strings.
899 *
900 * Params:
901 * str1 = a UTF-8 encoded string
902 * str2 = a UTF-8 encoded string
903 *
904 * Returns: < 0 if @str1 compares before @str2,
905 * 0 if they compare equal, > 0 if @str1 compares after @str2.
906 */907 publicstaticintutf8Collate(stringstr1, stringstr2)
908 {
909 returng_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
910 }
911 912 /**
913 * Converts a string into a collation key that can be compared
914 * with other collation keys produced by the same function using
915 * strcmp().
916 *
917 * The results of comparing the collation keys of two strings
918 * with strcmp() will always be the same as comparing the two
919 * original keys with g_utf8_collate().
920 *
921 * Note that this function depends on the [current locale][setlocale].
922 *
923 * Params:
924 * str = a UTF-8 encoded string.
925 * len = length of @str, in bytes, or -1 if @str is nul-terminated.
926 *
927 * Returns: a newly allocated string. This string should
928 * be freed with g_free() when you are done with it.
929 */930 publicstaticstringutf8CollateKey(stringstr, ptrdiff_tlen)
931 {
932 autoretStr = g_utf8_collate_key(Str.toStringz(str), len);
933 934 scope(exit) Str.freeString(retStr);
935 returnStr.toString(retStr);
936 }
937 938 /**
939 * Converts a string into a collation key that can be compared
940 * with other collation keys produced by the same function using strcmp().
941 *
942 * In order to sort filenames correctly, this function treats the dot '.'
943 * as a special case. Most dictionary orderings seem to consider it
944 * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
945 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
946 * would like to treat numbers intelligently so that "file1" "file10" "file5"
947 * is sorted as "file1" "file5" "file10".
948 *
949 * Note that this function depends on the [current locale][setlocale].
950 *
951 * Params:
952 * str = a UTF-8 encoded string.
953 * len = length of @str, in bytes, or -1 if @str is nul-terminated.
954 *
955 * Returns: a newly allocated string. This string should
956 * be freed with g_free() when you are done with it.
957 *
958 * Since: 2.8
959 */960 publicstaticstringutf8CollateKeyForFilename(stringstr, ptrdiff_tlen)
961 {
962 autoretStr = g_utf8_collate_key_for_filename(Str.toStringz(str), len);
963 964 scope(exit) Str.freeString(retStr);
965 returnStr.toString(retStr);
966 }
967 968 /**
969 * Finds the start of the next UTF-8 character in the string after @p.
970 *
971 * @p does not have to be at the beginning of a UTF-8 character. No check
972 * is made to see if the character found is actually valid other than
973 * it starts with an appropriate byte.
974 *
975 * If @end is %NULL, the return value will never be %NULL: if the end of the
976 * string is reached, a pointer to the terminating nul byte is returned. If
977 * @end is non-%NULL, the return value will be %NULL if the end of the string
978 * is reached.
979 *
980 * Params:
981 * p = a pointer to a position within a UTF-8 encoded string
982 * end = a pointer to the byte following the end of the string,
983 * or %NULL to indicate that the string is nul-terminated
984 *
985 * Returns: a pointer to the found character or %NULL if @end is
986 * set and is reached
987 */988 publicstaticstringutf8FindNextChar(stringp, stringend)
989 {
990 returnStr.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)));
991 }
992 993 /**
994 * Given a position @p with a UTF-8 encoded string @str, find the start
995 * of the previous UTF-8 character starting before @p. Returns %NULL if no
996 * UTF-8 characters are present in @str before @p.
997 *
998 * @p does not have to be at the beginning of a UTF-8 character. No check
999 * is made to see if the character found is actually valid other than
1000 * it starts with an appropriate byte.
1001 *
1002 * Params:
1003 * str = pointer to the beginning of a UTF-8 encoded string
1004 * p = pointer to some position within @str
1005 *
1006 * Returns: a pointer to the found character or %NULL.
1007 */1008 publicstaticstringutf8FindPrevChar(stringstr, stringp)
1009 {
1010 returnStr.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)));
1011 }
1012 1013 /**
1014 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
1015 *
1016 * If @p does not point to a valid UTF-8 encoded character, results
1017 * are undefined. If you are not sure that the bytes are complete
1018 * valid Unicode characters, you should use g_utf8_get_char_validated()
1019 * instead.
1020 *
1021 * Params:
1022 * p = a pointer to Unicode character encoded as UTF-8
1023 *
1024 * Returns: the resulting character
1025 */1026 publicstaticdcharutf8GetChar(stringp)
1027 {
1028 returng_utf8_get_char(Str.toStringz(p));
1029 }
1030 1031 /**
1032 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
1033 * This function checks for incomplete characters, for invalid characters
1034 * such as characters that are out of the range of Unicode, and for
1035 * overlong encodings of valid characters.
1036 *
1037 * Note that g_utf8_get_char_validated() returns (gunichar)-2 if
1038 * @max_len is positive and any of the bytes in the first UTF-8 character
1039 * sequence are nul.
1040 *
1041 * Params:
1042 * p = a pointer to Unicode character encoded as UTF-8
1043 * maxLen = the maximum number of bytes to read, or -1 if @p is nul-terminated
1044 *
1045 * Returns: the resulting character. If @p points to a partial
1046 * sequence at the end of a string that could begin a valid
1047 * character (or if @max_len is zero), returns (gunichar)-2;
1048 * otherwise, if @p does not point to a valid UTF-8 encoded
1049 * Unicode character, returns (gunichar)-1.
1050 */1051 publicstaticdcharutf8GetCharValidated(stringp, ptrdiff_tmaxLen)
1052 {
1053 returng_utf8_get_char_validated(Str.toStringz(p), maxLen);
1054 }
1055 1056 /**
1057 * Converts a string into canonical form, standardizing
1058 * such issues as whether a character with an accent
1059 * is represented as a base character and combining
1060 * accent or as a single precomposed character. The
1061 * string has to be valid UTF-8, otherwise %NULL is
1062 * returned. You should generally call g_utf8_normalize()
1063 * before comparing two Unicode strings.
1064 *
1065 * The normalization mode %G_NORMALIZE_DEFAULT only
1066 * standardizes differences that do not affect the
1067 * text content, such as the above-mentioned accent
1068 * representation. %G_NORMALIZE_ALL also standardizes
1069 * the "compatibility" characters in Unicode, such
1070 * as SUPERSCRIPT THREE to the standard forms
1071 * (in this case DIGIT THREE). Formatting information
1072 * may be lost but for most text operations such
1073 * characters should be considered the same.
1074 *
1075 * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
1076 * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
1077 * but returned a result with composed forms rather
1078 * than a maximally decomposed form. This is often
1079 * useful if you intend to convert the string to
1080 * a legacy encoding or pass it to a system with
1081 * less capable Unicode handling.
1082 *
1083 * Params:
1084 * str = a UTF-8 encoded string.
1085 * len = length of @str, in bytes, or -1 if @str is nul-terminated.
1086 * mode = the type of normalization to perform.
1087 *
1088 * Returns: a newly allocated string, that
1089 * is the normalized form of @str, or %NULL if @str
1090 * is not valid UTF-8.
1091 */1092 publicstaticstringutf8Normalize(stringstr, ptrdiff_tlen, GNormalizeModemode)
1093 {
1094 autoretStr = g_utf8_normalize(Str.toStringz(str), len, mode);
1095 1096 scope(exit) Str.freeString(retStr);
1097 returnStr.toString(retStr);
1098 }
1099 1100 /**
1101 * Converts from an integer character offset to a pointer to a position
1102 * within the string.
1103 *
1104 * Since 2.10, this function allows to pass a negative @offset to
1105 * step backwards. It is usually worth stepping backwards from the end
1106 * instead of forwards if @offset is in the last fourth of the string,
1107 * since moving forward is about 3 times faster than moving backward.
1108 *
1109 * Note that this function doesn't abort when reaching the end of @str.
1110 * Therefore you should be sure that @offset is within string boundaries
1111 * before calling that function. Call g_utf8_strlen() when unsure.
1112 * This limitation exists as this function is called frequently during
1113 * text rendering and therefore has to be as fast as possible.
1114 *
1115 * Params:
1116 * str = a UTF-8 encoded string
1117 * offset = a character offset within @str
1118 *
1119 * Returns: the resulting pointer
1120 */1121 publicstaticstringutf8OffsetToPointer(stringstr, glongoffset)
1122 {
1123 returnStr.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset));
1124 }
1125 1126 /**
1127 * Converts from a pointer to position within a string to an integer
1128 * character offset.
1129 *
1130 * Since 2.10, this function allows @pos to be before @str, and returns
1131 * a negative offset in this case.
1132 *
1133 * Params:
1134 * str = a UTF-8 encoded string
1135 * pos = a pointer to a position within @str
1136 *
1137 * Returns: the resulting character offset
1138 */1139 publicstaticglongutf8PointerToOffset(stringstr, stringpos)
1140 {
1141 returng_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
1142 }
1143 1144 /**
1145 * Finds the previous UTF-8 character in the string before @p.
1146 *
1147 * @p does not have to be at the beginning of a UTF-8 character. No check
1148 * is made to see if the character found is actually valid other than
1149 * it starts with an appropriate byte. If @p might be the first
1150 * character of the string, you must use g_utf8_find_prev_char() instead.
1151 *
1152 * Params:
1153 * p = a pointer to a position within a UTF-8 encoded string
1154 *
1155 * Returns: a pointer to the found character
1156 */1157 publicstaticstringutf8PrevChar(stringp)
1158 {
1159 returnStr.toString(g_utf8_prev_char(Str.toStringz(p)));
1160 }
1161 1162 /**
1163 * Finds the leftmost occurrence of the given Unicode character
1164 * in a UTF-8 encoded string, while limiting the search to @len bytes.
1165 * If @len is -1, allow unbounded search.
1166 *
1167 * Params:
1168 * p = a nul-terminated UTF-8 encoded string
1169 * len = the maximum length of @p
1170 * c = a Unicode character
1171 *
1172 * Returns: %NULL if the string does not contain the character,
1173 * otherwise, a pointer to the start of the leftmost occurrence
1174 * of the character in the string.
1175 */1176 publicstaticstringutf8Strchr(stringp, ptrdiff_tlen, dcharc)
1177 {
1178 returnStr.toString(g_utf8_strchr(Str.toStringz(p), len, c));
1179 }
1180 1181 /**
1182 * Converts all Unicode characters in the string that have a case
1183 * to lowercase. The exact manner that this is done depends
1184 * on the current locale, and may result in the number of
1185 * characters in the string changing.
1186 *
1187 * Params:
1188 * str = a UTF-8 encoded string
1189 * len = length of @str, in bytes, or -1 if @str is nul-terminated.
1190 *
1191 * Returns: a newly allocated string, with all characters
1192 * converted to lowercase.
1193 */1194 publicstaticstringutf8Strdown(stringstr, ptrdiff_tlen)
1195 {
1196 autoretStr = g_utf8_strdown(Str.toStringz(str), len);
1197 1198 scope(exit) Str.freeString(retStr);
1199 returnStr.toString(retStr);
1200 }
1201 1202 /**
1203 * Computes the length of the string in characters, not including
1204 * the terminating nul character. If the @max'th byte falls in the
1205 * middle of a character, the last (partial) character is not counted.
1206 *
1207 * Params:
1208 * p = pointer to the start of a UTF-8 encoded string
1209 * max = the maximum number of bytes to examine. If @max
1210 * is less than 0, then the string is assumed to be
1211 * nul-terminated. If @max is 0, @p will not be examined and
1212 * may be %NULL. If @max is greater than 0, up to @max
1213 * bytes are examined
1214 *
1215 * Returns: the length of the string in characters
1216 */1217 publicstaticglongutf8Strlen(stringp, ptrdiff_tmax)
1218 {
1219 returng_utf8_strlen(Str.toStringz(p), max);
1220 }
1221 1222 /**
1223 * Like the standard C strncpy() function, but copies a given number
1224 * of characters instead of a given number of bytes. The @src string
1225 * must be valid UTF-8 encoded text. (Use g_utf8_validate() on all
1226 * text before trying to use UTF-8 utility functions with it.)
1227 *
1228 * Note you must ensure @dest is at least 4 * @n to fit the
1229 * largest possible UTF-8 characters
1230 *
1231 * Params:
1232 * dest = buffer to fill with characters from @src
1233 * src = UTF-8 encoded string
1234 * n = character count
1235 *
1236 * Returns: @dest
1237 */1238 publicstaticstringutf8Strncpy(stringdest, stringsrc, size_tn)
1239 {
1240 returnStr.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n));
1241 }
1242 1243 /**
1244 * Find the rightmost occurrence of the given Unicode character
1245 * in a UTF-8 encoded string, while limiting the search to @len bytes.
1246 * If @len is -1, allow unbounded search.
1247 *
1248 * Params:
1249 * p = a nul-terminated UTF-8 encoded string
1250 * len = the maximum length of @p
1251 * c = a Unicode character
1252 *
1253 * Returns: %NULL if the string does not contain the character,
1254 * otherwise, a pointer to the start of the rightmost occurrence
1255 * of the character in the string.
1256 */1257 publicstaticstringutf8Strrchr(stringp, ptrdiff_tlen, dcharc)
1258 {
1259 returnStr.toString(g_utf8_strrchr(Str.toStringz(p), len, c));
1260 }
1261 1262 /**
1263 * Reverses a UTF-8 string. @str must be valid UTF-8 encoded text.
1264 * (Use g_utf8_validate() on all text before trying to use UTF-8
1265 * utility functions with it.)
1266 *
1267 * This function is intended for programmatic uses of reversed strings.
1268 * It pays no attention to decomposed characters, combining marks, byte
1269 * order marks, directional indicators (LRM, LRO, etc) and similar
1270 * characters which might need special handling when reversing a string
1271 * for display purposes.
1272 *
1273 * Note that unlike g_strreverse(), this function returns
1274 * newly-allocated memory, which should be freed with g_free() when
1275 * no longer needed.
1276 *
1277 * Params:
1278 * str = a UTF-8 encoded string
1279 * len = the maximum length of @str to use, in bytes. If @len < 0,
1280 * then the string is nul-terminated.
1281 *
1282 * Returns: a newly-allocated string which is the reverse of @str
1283 *
1284 * Since: 2.2
1285 */1286 publicstaticstringutf8Strreverse(stringstr, ptrdiff_tlen)
1287 {
1288 autoretStr = g_utf8_strreverse(Str.toStringz(str), len);
1289 1290 scope(exit) Str.freeString(retStr);
1291 returnStr.toString(retStr);
1292 }
1293 1294 /**
1295 * Converts all Unicode characters in the string that have a case
1296 * to uppercase. The exact manner that this is done depends
1297 * on the current locale, and may result in the number of
1298 * characters in the string increasing. (For instance, the
1299 * German ess-zet will be changed to SS.)
1300 *
1301 * Params:
1302 * str = a UTF-8 encoded string
1303 * len = length of @str, in bytes, or -1 if @str is nul-terminated.
1304 *
1305 * Returns: a newly allocated string, with all characters
1306 * converted to uppercase.
1307 */1308 publicstaticstringutf8Strup(stringstr, ptrdiff_tlen)
1309 {
1310 autoretStr = g_utf8_strup(Str.toStringz(str), len);
1311 1312 scope(exit) Str.freeString(retStr);
1313 returnStr.toString(retStr);
1314 }
1315 1316 /**
1317 * Copies a substring out of a UTF-8 encoded string.
1318 * The substring will contain @end_pos - @start_pos characters.
1319 *
1320 * Params:
1321 * str = a UTF-8 encoded string
1322 * startPos = a character offset within @str
1323 * endPos = another character offset within @str
1324 *
1325 * Returns: a newly allocated copy of the requested
1326 * substring. Free with g_free() when no longer needed.
1327 *
1328 * Since: 2.30
1329 */1330 publicstaticstringutf8Substring(stringstr, glongstartPos, glongendPos)
1331 {
1332 autoretStr = g_utf8_substring(Str.toStringz(str), startPos, endPos);
1333 1334 scope(exit) Str.freeString(retStr);
1335 returnStr.toString(retStr);
1336 }
1337 1338 /**
1339 * Convert a string from UTF-8 to a 32-bit fixed width
1340 * representation as UCS-4. A trailing 0 character will be added to the
1341 * string after the converted text.
1342 *
1343 * Params:
1344 * str = a UTF-8 encoded string
1345 * len = the maximum length of @str to use, in bytes. If @len < 0,
1346 * then the string is nul-terminated.
1347 * itemsRead = location to store number of
1348 * bytes read, or %NULL.
1349 * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
1350 * returned in case @str contains a trailing partial
1351 * character. If an error occurs then the index of the
1352 * invalid input is stored here.
1353 * itemsWritten = location to store number
1354 * of characters written or %NULL. The value here stored does not include
1355 * the trailing 0 character.
1356 *
1357 * Returns: a pointer to a newly allocated UCS-4 string.
1358 * This value must be freed with g_free(). If an error occurs,
1359 * %NULL will be returned and @error set.
1360 *
1361 * Throws: GException on failure.
1362 */1363 publicstaticdchar* utf8ToUcs4(stringstr, glonglen, outglongitemsRead, outglongitemsWritten)
1364 {
1365 GError* err = null;
1366 1367 auto__p = g_utf8_to_ucs4(Str.toStringz(str), len, &itemsRead, &itemsWritten, &err);
1368 1369 if (err !isnull)
1370 {
1371 thrownewGException( newErrorG(err) );
1372 }
1373 1374 return__p;
1375 }
1376 1377 /**
1378 * Convert a string from UTF-8 to a 32-bit fixed width
1379 * representation as UCS-4, assuming valid UTF-8 input.
1380 * This function is roughly twice as fast as g_utf8_to_ucs4()
1381 * but does no error checking on the input. A trailing 0 character
1382 * will be added to the string after the converted text.
1383 *
1384 * Params:
1385 * str = a UTF-8 encoded string
1386 * len = the maximum length of @str to use, in bytes. If @len < 0,
1387 * then the string is nul-terminated.
1388 * itemsWritten = location to store the
1389 * number of characters in the result, or %NULL.
1390 *
1391 * Returns: a pointer to a newly allocated UCS-4 string.
1392 * This value must be freed with g_free().
1393 */1394 publicstaticdchar* utf8ToUcs4Fast(stringstr, glonglen, outglongitemsWritten)
1395 {
1396 returng_utf8_to_ucs4_fast(Str.toStringz(str), len, &itemsWritten);
1397 }
1398 1399 /**
1400 * Convert a string from UTF-8 to UTF-16. A 0 character will be
1401 * added to the result after the converted text.
1402 *
1403 * Params:
1404 * str = a UTF-8 encoded string
1405 * len = the maximum length (number of bytes) of @str to use.
1406 * If @len < 0, then the string is nul-terminated.
1407 * itemsRead = location to store number of
1408 * bytes read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
1409 * be returned in case @str contains a trailing partial character. If
1410 * an error occurs then the index of the invalid input is stored here.
1411 * itemsWritten = location to store number
1412 * of #gunichar2 written, or %NULL. The value stored here does not include
1413 * the trailing 0.
1414 *
1415 * Returns: a pointer to a newly allocated UTF-16 string.
1416 * This value must be freed with g_free(). If an error occurs,
1417 * %NULL will be returned and @error set.
1418 *
1419 * Throws: GException on failure.
1420 */1421 publicstaticwchar* utf8ToUtf16(stringstr, glonglen, outglongitemsRead, outglongitemsWritten)
1422 {
1423 GError* err = null;
1424 1425 auto__p = g_utf8_to_utf16(Str.toStringz(str), len, &itemsRead, &itemsWritten, &err);
1426 1427 if (err !isnull)
1428 {
1429 thrownewGException( newErrorG(err) );
1430 }
1431 1432 return__p;
1433 }
1434 1435 /**
1436 * Validates UTF-8 encoded text. @str is the text to validate;
1437 * if @str is nul-terminated, then @max_len can be -1, otherwise
1438 * @max_len should be the number of bytes to validate.
1439 * If @end is non-%NULL, then the end of the valid range
1440 * will be stored there (i.e. the start of the first invalid
1441 * character if some bytes were invalid, or the end of the text
1442 * being validated otherwise).
1443 *
1444 * Note that g_utf8_validate() returns %FALSE if @max_len is
1445 * positive and any of the @max_len bytes are nul.
1446 *
1447 * Returns %TRUE if all of @str was valid. Many GLib and GTK+
1448 * routines require valid UTF-8 as input; so data read from a file
1449 * or the network should be checked with g_utf8_validate() before
1450 * doing anything else with it.
1451 *
1452 * Params:
1453 * str = a pointer to character data
1454 * end = return location for end of valid data
1455 *
1456 * Returns: %TRUE if the text was valid UTF-8
1457 */1458 publicstaticboolutf8Validate(stringstr, outstringend)
1459 {
1460 char* outend = null;
1461 1462 auto__p = g_utf8_validate(Str.toStringz(str), cast(ptrdiff_t)str.length, &outend) != 0;
1463 1464 end = Str.toString(outend);
1465 1466 return__p;
1467 }
1468 1469 /**
1470 * If the provided string is valid UTF-8, return a copy of it. If not,
1471 * return a copy in which bytes that could not be interpreted as valid Unicode
1472 * are replaced with the Unicode replacement character (U+FFFD).
1473 *
1474 * For example, this is an appropriate function to use if you have received
1475 * a string that was incorrectly declared to be UTF-8, and you need a valid
1476 * UTF-8 version of it that can be logged or displayed to the user, with the
1477 * assumption that it is close enough to ASCII or UTF-8 to be mostly
1478 * readable as-is.
1479 *
1480 * Params:
1481 * str = string to coerce into UTF-8
1482 * len = the maximum length of @str to use, in bytes. If @len < 0,
1483 * then the string is nul-terminated.
1484 *
1485 * Returns: a valid UTF-8 string whose content resembles @str
1486 *
1487 * Since: 2.52
1488 */1489 publicstaticstringutf8MakeValid(stringstr, ptrdiff_tlen)
1490 {
1491 autoretStr = g_utf8_make_valid(Str.toStringz(str), len);
1492 1493 scope(exit) Str.freeString(retStr);
1494 returnStr.toString(retStr);
1495 }
1496 1497 /**
1498 * Validates UTF-8 encoded text.
1499 *
1500 * As with g_utf8_validate(), but @max_len must be set, and hence this function
1501 * will always return %FALSE if any of the bytes of @str are nul.
1502 *
1503 * Params:
1504 * str = a pointer to character data
1505 * end = return location for end of valid data
1506 *
1507 * Returns: %TRUE if the text was valid UTF-8
1508 *
1509 * Since: 2.60
1510 */1511 publicstaticboolutf8ValidateLen(stringstr, outstringend)
1512 {
1513 char* outend = null;
1514 1515 auto__p = g_utf8_validate_len(Str.toStringz(str), cast(size_t)str.length, &outend) != 0;
1516 1517 end = Str.toString(outend);
1518 1519 return__p;
1520 }
1521 }