1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.Unicode; 26 27 private import glib.ErrorG; 28 private import glib.GException; 29 private import glib.Str; 30 private import gtkc.glib; 31 public import gtkc.glibtypes; 32 33 34 /** */ 35 public struct Unicode 36 { 37 38 /** 39 * Convert a string from UCS-4 to UTF-16. A 0 character will be 40 * added to the result after the converted text. 41 * 42 * Params: 43 * str = a UCS-4 encoded string 44 * len = the maximum length (number of characters) of @str to use. 45 * If @len < 0, then the string is nul-terminated. 46 * itemsRead = location to store number of bytes read, 47 * or %NULL. If an error occurs then the index of the invalid input 48 * is stored here. 49 * itemsWritten = location to store number of #gunichar2 50 * written, or %NULL. The value stored here does not include the 51 * trailing 0. 52 * 53 * Return: a pointer to a newly allocated UTF-16 string. 54 * This value must be freed with g_free(). If an error occurs, 55 * %NULL will be returned and @error set. 56 * 57 * Throws: GException on failure. 58 */ 59 public static wchar* ucs4ToUtf16(dchar* str, glong len, glong* itemsRead, glong* itemsWritten) 60 { 61 GError* err = null; 62 63 auto p = g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, &err); 64 65 if (err !is null) 66 { 67 throw new GException( new ErrorG(err) ); 68 } 69 70 return p; 71 } 72 73 /** 74 * Convert a string from a 32-bit fixed width representation as UCS-4. 75 * to UTF-8. The result will be terminated with a 0 byte. 76 * 77 * Params: 78 * str = a UCS-4 encoded string 79 * len = the maximum length (number of characters) of @str to use. 80 * If @len < 0, then the string is nul-terminated. 81 * itemsRead = location to store number of characters 82 * read, or %NULL. 83 * itemsWritten = location to store number of bytes 84 * written or %NULL. The value here stored does not include the 85 * trailing 0 byte. 86 * 87 * Return: a pointer to a newly allocated UTF-8 string. 88 * This value must be freed with g_free(). If an error occurs, 89 * %NULL will be returned and @error set. In that case, @items_read 90 * will be set to the position of the first invalid input character. 91 * 92 * Throws: GException on failure. 93 */ 94 public static string ucs4ToUtf8(dchar* str, glong len, glong* itemsRead, glong* itemsWritten) 95 { 96 GError* err = null; 97 98 auto retStr = g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, &err); 99 100 if (err !is null) 101 { 102 throw new GException( new ErrorG(err) ); 103 } 104 105 scope(exit) Str.freeString(retStr); 106 return Str.toString(retStr); 107 } 108 109 /** 110 * Determines the break type of @c. @c should be a Unicode character 111 * (to derive a character from UTF-8 encoded text, use 112 * g_utf8_get_char()). The break type is used to find word and line 113 * breaks ("text boundaries"), Pango implements the Unicode boundary 114 * resolution algorithms and normally you would use a function such 115 * as pango_break() instead of caring about break types yourself. 116 * 117 * Params: 118 * c = a Unicode character 119 * 120 * Return: the break type of @c 121 */ 122 public static GUnicodeBreakType unicharBreakType(dchar c) 123 { 124 return g_unichar_break_type(c); 125 } 126 127 /** 128 * Determines the canonical combining class of a Unicode character. 129 * 130 * Params: 131 * uc = a Unicode character 132 * 133 * Return: the combining class of the character 134 * 135 * Since: 2.14 136 */ 137 public static int unicharCombiningClass(dchar uc) 138 { 139 return g_unichar_combining_class(uc); 140 } 141 142 /** 143 * Performs a single composition step of the 144 * Unicode canonical composition algorithm. 145 * 146 * This function includes algorithmic Hangul Jamo composition, 147 * but it is not exactly the inverse of g_unichar_decompose(). 148 * No composition can have either of @a or @b equal to zero. 149 * To be precise, this function composes if and only if 150 * there exists a Primary Composite P which is canonically 151 * equivalent to the sequence <@a,@b>. See the Unicode 152 * Standard for the definition of Primary Composite. 153 * 154 * If @a and @b do not compose a new character, @ch is set to zero. 155 * 156 * See 157 * [UAX#15](http://unicode.org/reports/tr15/) 158 * for details. 159 * 160 * Params: 161 * a = a Unicode character 162 * b = a Unicode character 163 * ch = return location for the composed character 164 * 165 * Return: %TRUE if the characters could be composed 166 * 167 * Since: 2.30 168 */ 169 public static bool unicharCompose(dchar a, dchar b, dchar* ch) 170 { 171 return g_unichar_compose(a, b, ch) != 0; 172 } 173 174 /** 175 * Performs a single decomposition step of the 176 * Unicode canonical decomposition algorithm. 177 * 178 * This function does not include compatibility 179 * decompositions. It does, however, include algorithmic 180 * Hangul Jamo decomposition, as well as 'singleton' 181 * decompositions which replace a character by a single 182 * other character. In the case of singletons *@b will 183 * be set to zero. 184 * 185 * If @ch is not decomposable, *@a is set to @ch and *@b 186 * is set to zero. 187 * 188 * Note that the way Unicode decomposition pairs are 189 * defined, it is guaranteed that @b would not decompose 190 * further, but @a may itself decompose. To get the full 191 * canonical decomposition for @ch, one would need to 192 * recursively call this function on @a. Or use 193 * g_unichar_fully_decompose(). 194 * 195 * See 196 * [UAX#15](http://unicode.org/reports/tr15/) 197 * for details. 198 * 199 * Params: 200 * ch = a Unicode character 201 * a = return location for the first component of @ch 202 * b = return location for the second component of @ch 203 * 204 * Return: %TRUE if the character could be decomposed 205 * 206 * Since: 2.30 207 */ 208 public static bool unicharDecompose(dchar ch, dchar* a, dchar* b) 209 { 210 return g_unichar_decompose(ch, a, b) != 0; 211 } 212 213 /** 214 * Determines the numeric value of a character as a decimal 215 * digit. 216 * 217 * Params: 218 * c = a Unicode character 219 * 220 * Return: If @c is a decimal digit (according to 221 * g_unichar_isdigit()), its numeric value. Otherwise, -1. 222 */ 223 public static int unicharDigitValue(dchar c) 224 { 225 return g_unichar_digit_value(c); 226 } 227 228 /** 229 * Computes the canonical or compatibility decomposition of a 230 * Unicode character. For compatibility decomposition, 231 * pass %TRUE for @compat; for canonical decomposition 232 * pass %FALSE for @compat. 233 * 234 * The decomposed sequence is placed in @result. Only up to 235 * @result_len characters are written into @result. The length 236 * of the full decomposition (irrespective of @result_len) is 237 * returned by the function. For canonical decomposition, 238 * currently all decompositions are of length at most 4, but 239 * this may change in the future (very unlikely though). 240 * At any rate, Unicode does guarantee that a buffer of length 241 * 18 is always enough for both compatibility and canonical 242 * decompositions, so that is the size recommended. This is provided 243 * as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH. 244 * 245 * See 246 * [UAX#15](http://unicode.org/reports/tr15/) 247 * for details. 248 * 249 * Params: 250 * ch = a Unicode character. 251 * compat = whether perform canonical or compatibility decomposition 252 * result = location to store decomposed result, or %NULL 253 * resultLen = length of @result 254 * 255 * Return: the length of the full decomposition. 256 * 257 * Since: 2.30 258 */ 259 public static size_t unicharFullyDecompose(dchar ch, bool compat, dchar* result, size_t resultLen) 260 { 261 return g_unichar_fully_decompose(ch, compat, result, resultLen); 262 } 263 264 /** 265 * In Unicode, some characters are "mirrored". This means that their 266 * images are mirrored horizontally in text that is laid out from right 267 * to left. For instance, "(" would become its mirror image, ")", in 268 * right-to-left text. 269 * 270 * If @ch has the Unicode mirrored property and there is another unicode 271 * character that typically has a glyph that is the mirror image of @ch's 272 * glyph and @mirrored_ch is set, it puts that character in the address 273 * pointed to by @mirrored_ch. Otherwise the original character is put. 274 * 275 * Params: 276 * ch = a Unicode character 277 * mirroredCh = location to store the mirrored character 278 * 279 * Return: %TRUE if @ch has a mirrored character, %FALSE otherwise 280 * 281 * Since: 2.4 282 */ 283 public static bool unicharGetMirrorChar(dchar ch, dchar* mirroredCh) 284 { 285 return g_unichar_get_mirror_char(ch, mirroredCh) != 0; 286 } 287 288 /** 289 * Looks up the #GUnicodeScript for a particular character (as defined 290 * by Unicode Standard Annex \#24). No check is made for @ch being a 291 * valid Unicode character; if you pass in invalid character, the 292 * result is undefined. 293 * 294 * This function is equivalent to pango_script_for_unichar() and the 295 * two are interchangeable. 296 * 297 * Params: 298 * ch = a Unicode character 299 * 300 * Return: the #GUnicodeScript for the character. 301 * 302 * Since: 2.14 303 */ 304 public static GUnicodeScript unicharGetScript(dchar ch) 305 { 306 return g_unichar_get_script(ch); 307 } 308 309 /** 310 * Determines whether a character is alphanumeric. 311 * Given some UTF-8 text, obtain a character value 312 * with g_utf8_get_char(). 313 * 314 * Params: 315 * c = a Unicode character 316 * 317 * Return: %TRUE if @c is an alphanumeric character 318 */ 319 public static bool unicharIsalnum(dchar c) 320 { 321 return g_unichar_isalnum(c) != 0; 322 } 323 324 /** 325 * Determines whether a character is alphabetic (i.e. a letter). 326 * Given some UTF-8 text, obtain a character value with 327 * g_utf8_get_char(). 328 * 329 * Params: 330 * c = a Unicode character 331 * 332 * Return: %TRUE if @c is an alphabetic character 333 */ 334 public static bool unicharIsalpha(dchar c) 335 { 336 return g_unichar_isalpha(c) != 0; 337 } 338 339 /** 340 * Determines whether a character is a control character. 341 * Given some UTF-8 text, obtain a character value with 342 * g_utf8_get_char(). 343 * 344 * Params: 345 * c = a Unicode character 346 * 347 * Return: %TRUE if @c is a control character 348 */ 349 public static bool unicharIscntrl(dchar c) 350 { 351 return g_unichar_iscntrl(c) != 0; 352 } 353 354 /** 355 * Determines if a given character is assigned in the Unicode 356 * standard. 357 * 358 * Params: 359 * c = a Unicode character 360 * 361 * Return: %TRUE if the character has an assigned value 362 */ 363 public static bool unicharIsdefined(dchar c) 364 { 365 return g_unichar_isdefined(c) != 0; 366 } 367 368 /** 369 * Determines whether a character is numeric (i.e. a digit). This 370 * covers ASCII 0-9 and also digits in other languages/scripts. Given 371 * some UTF-8 text, obtain a character value with g_utf8_get_char(). 372 * 373 * Params: 374 * c = a Unicode character 375 * 376 * Return: %TRUE if @c is a digit 377 */ 378 public static bool unicharIsdigit(dchar c) 379 { 380 return g_unichar_isdigit(c) != 0; 381 } 382 383 /** 384 * Determines whether a character is printable and not a space 385 * (returns %FALSE for control characters, format characters, and 386 * spaces). g_unichar_isprint() is similar, but returns %TRUE for 387 * spaces. Given some UTF-8 text, obtain a character value with 388 * g_utf8_get_char(). 389 * 390 * Params: 391 * c = a Unicode character 392 * 393 * Return: %TRUE if @c is printable unless it's a space 394 */ 395 public static bool unicharIsgraph(dchar c) 396 { 397 return g_unichar_isgraph(c) != 0; 398 } 399 400 /** 401 * Determines whether a character is a lowercase letter. 402 * Given some UTF-8 text, obtain a character value with 403 * g_utf8_get_char(). 404 * 405 * Params: 406 * c = a Unicode character 407 * 408 * Return: %TRUE if @c is a lowercase letter 409 */ 410 public static bool unicharIslower(dchar c) 411 { 412 return g_unichar_islower(c) != 0; 413 } 414 415 /** 416 * Determines whether a character is a mark (non-spacing mark, 417 * combining mark, or enclosing mark in Unicode speak). 418 * Given some UTF-8 text, obtain a character value 419 * with g_utf8_get_char(). 420 * 421 * Note: in most cases where isalpha characters are allowed, 422 * ismark characters should be allowed to as they are essential 423 * for writing most European languages as well as many non-Latin 424 * scripts. 425 * 426 * Params: 427 * c = a Unicode character 428 * 429 * Return: %TRUE if @c is a mark character 430 * 431 * Since: 2.14 432 */ 433 public static bool unicharIsmark(dchar c) 434 { 435 return g_unichar_ismark(c) != 0; 436 } 437 438 /** 439 * Determines whether a character is printable. 440 * Unlike g_unichar_isgraph(), returns %TRUE for spaces. 441 * Given some UTF-8 text, obtain a character value with 442 * g_utf8_get_char(). 443 * 444 * Params: 445 * c = a Unicode character 446 * 447 * Return: %TRUE if @c is printable 448 */ 449 public static bool unicharIsprint(dchar c) 450 { 451 return g_unichar_isprint(c) != 0; 452 } 453 454 /** 455 * Determines whether a character is punctuation or a symbol. 456 * Given some UTF-8 text, obtain a character value with 457 * g_utf8_get_char(). 458 * 459 * Params: 460 * c = a Unicode character 461 * 462 * Return: %TRUE if @c is a punctuation or symbol character 463 */ 464 public static bool unicharIspunct(dchar c) 465 { 466 return g_unichar_ispunct(c) != 0; 467 } 468 469 /** 470 * Determines whether a character is a space, tab, or line separator 471 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a 472 * character value with g_utf8_get_char(). 473 * 474 * (Note: don't use this to do word breaking; you have to use 475 * Pango or equivalent to get word breaking right, the algorithm 476 * is fairly complex.) 477 * 478 * Params: 479 * c = a Unicode character 480 * 481 * Return: %TRUE if @c is a space character 482 */ 483 public static bool unicharIsspace(dchar c) 484 { 485 return g_unichar_isspace(c) != 0; 486 } 487 488 /** 489 * Determines if a character is titlecase. Some characters in 490 * Unicode which are composites, such as the DZ digraph 491 * have three case variants instead of just two. The titlecase 492 * form is used at the beginning of a word where only the 493 * first letter is capitalized. The titlecase form of the DZ 494 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. 495 * 496 * Params: 497 * c = a Unicode character 498 * 499 * Return: %TRUE if the character is titlecase 500 */ 501 public static bool unicharIstitle(dchar c) 502 { 503 return g_unichar_istitle(c) != 0; 504 } 505 506 /** 507 * Determines if a character is uppercase. 508 * 509 * Params: 510 * c = a Unicode character 511 * 512 * Return: %TRUE if @c is an uppercase character 513 */ 514 public static bool unicharIsupper(dchar c) 515 { 516 return g_unichar_isupper(c) != 0; 517 } 518 519 /** 520 * Determines if a character is typically rendered in a double-width 521 * cell. 522 * 523 * Params: 524 * c = a Unicode character 525 * 526 * Return: %TRUE if the character is wide 527 */ 528 public static bool unicharIswide(dchar c) 529 { 530 return g_unichar_iswide(c) != 0; 531 } 532 533 /** 534 * Determines if a character is typically rendered in a double-width 535 * cell under legacy East Asian locales. If a character is wide according to 536 * g_unichar_iswide(), then it is also reported wide with this function, but 537 * the converse is not necessarily true. See the 538 * [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) 539 * for details. 540 * 541 * If a character passes the g_unichar_iswide() test then it will also pass 542 * this test, but not the other way around. Note that some characters may 543 * pass both this test and g_unichar_iszerowidth(). 544 * 545 * Params: 546 * c = a Unicode character 547 * 548 * Return: %TRUE if the character is wide in legacy East Asian locales 549 * 550 * Since: 2.12 551 */ 552 public static bool unicharIswideCjk(dchar c) 553 { 554 return g_unichar_iswide_cjk(c) != 0; 555 } 556 557 /** 558 * Determines if a character is a hexidecimal digit. 559 * 560 * Params: 561 * c = a Unicode character. 562 * 563 * Return: %TRUE if the character is a hexadecimal digit 564 */ 565 public static bool unicharIsxdigit(dchar c) 566 { 567 return g_unichar_isxdigit(c) != 0; 568 } 569 570 /** 571 * Determines if a given character typically takes zero width when rendered. 572 * The return value is %TRUE for all non-spacing and enclosing marks 573 * (e.g., combining accents), format characters, zero-width 574 * space, but not U+00AD SOFT HYPHEN. 575 * 576 * A typical use of this function is with one of g_unichar_iswide() or 577 * g_unichar_iswide_cjk() to determine the number of cells a string occupies 578 * when displayed on a grid display (terminals). However, note that not all 579 * terminals support zero-width rendering of zero-width marks. 580 * 581 * Params: 582 * c = a Unicode character 583 * 584 * Return: %TRUE if the character has zero width 585 * 586 * Since: 2.14 587 */ 588 public static bool unicharIszerowidth(dchar c) 589 { 590 return g_unichar_iszerowidth(c) != 0; 591 } 592 593 /** 594 * Converts a single character to UTF-8. 595 * 596 * Params: 597 * c = a Unicode character code 598 * outbuf = output buffer, must have at least 6 bytes of space. 599 * If %NULL, the length will be computed and returned 600 * and nothing will be written to @outbuf. 601 * 602 * Return: number of bytes written 603 */ 604 public static int unicharToUtf8(dchar c, string outbuf) 605 { 606 return g_unichar_to_utf8(c, Str.toStringz(outbuf)); 607 } 608 609 /** 610 * Converts a character to lower case. 611 * 612 * Params: 613 * c = a Unicode character. 614 * 615 * Return: the result of converting @c to lower case. 616 * If @c is not an upperlower or titlecase character, 617 * or has no lowercase equivalent @c is returned unchanged. 618 */ 619 public static dchar unicharTolower(dchar c) 620 { 621 return g_unichar_tolower(c); 622 } 623 624 /** 625 * Converts a character to the titlecase. 626 * 627 * Params: 628 * c = a Unicode character 629 * 630 * Return: the result of converting @c to titlecase. 631 * If @c is not an uppercase or lowercase character, 632 * @c is returned unchanged. 633 */ 634 public static dchar unicharTotitle(dchar c) 635 { 636 return g_unichar_totitle(c); 637 } 638 639 /** 640 * Converts a character to uppercase. 641 * 642 * Params: 643 * c = a Unicode character 644 * 645 * Return: the result of converting @c to uppercase. 646 * If @c is not an lowercase or titlecase character, 647 * or has no upper case equivalent @c is returned unchanged. 648 */ 649 public static dchar unicharToupper(dchar c) 650 { 651 return g_unichar_toupper(c); 652 } 653 654 /** 655 * Classifies a Unicode character by type. 656 * 657 * Params: 658 * c = a Unicode character 659 * 660 * Return: the type of the character. 661 */ 662 public static GUnicodeType unicharType(dchar c) 663 { 664 return g_unichar_type(c); 665 } 666 667 /** 668 * Checks whether @ch is a valid Unicode character. Some possible 669 * integer values of @ch will not be valid. 0 is considered a valid 670 * character, though it's normally a string terminator. 671 * 672 * Params: 673 * ch = a Unicode character 674 * 675 * Return: %TRUE if @ch is a valid Unicode character 676 */ 677 public static bool unicharValidate(dchar ch) 678 { 679 return g_unichar_validate(ch) != 0; 680 } 681 682 /** 683 * Determines the numeric value of a character as a hexidecimal 684 * digit. 685 * 686 * Params: 687 * c = a Unicode character 688 * 689 * Return: If @c is a hex digit (according to 690 * g_unichar_isxdigit()), its numeric value. Otherwise, -1. 691 */ 692 public static int unicharXdigitValue(dchar c) 693 { 694 return g_unichar_xdigit_value(c); 695 } 696 697 /** 698 * Computes the canonical decomposition of a Unicode character. 699 * 700 * Deprecated: Use the more flexible g_unichar_fully_decompose() 701 * instead. 702 * 703 * Params: 704 * ch = a Unicode character. 705 * resultLen = location to store the length of the return value. 706 * 707 * Return: a newly allocated string of Unicode characters. 708 * @result_len is set to the resulting length of the string. 709 */ 710 public static dchar* unicodeCanonicalDecomposition(dchar ch, size_t* resultLen) 711 { 712 return g_unicode_canonical_decomposition(ch, resultLen); 713 } 714 715 /** 716 * Computes the canonical ordering of a string in-place. 717 * This rearranges decomposed characters in the string 718 * according to their combining classes. See the Unicode 719 * manual for more information. 720 * 721 * Params: 722 * str = a UCS-4 encoded string. 723 * len = the maximum length of @string to use. 724 */ 725 public static void unicodeCanonicalOrdering(dchar* str, size_t len) 726 { 727 g_unicode_canonical_ordering(str, len); 728 } 729 730 /** 731 * Looks up the Unicode script for @iso15924. ISO 15924 assigns four-letter 732 * codes to scripts. For example, the code for Arabic is 'Arab'. 733 * This function accepts four letter codes encoded as a @guint32 in a 734 * big-endian fashion. That is, the code expected for Arabic is 735 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc). 736 * 737 * See 738 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html) 739 * for details. 740 * 741 * Params: 742 * iso15924 = a Unicode script 743 * 744 * Return: the Unicode script for @iso15924, or 745 * of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and 746 * %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown. 747 * 748 * Since: 2.30 749 */ 750 public static GUnicodeScript unicodeScriptFromIso15924(uint iso15924) 751 { 752 return g_unicode_script_from_iso15924(iso15924); 753 } 754 755 /** 756 * Looks up the ISO 15924 code for @script. ISO 15924 assigns four-letter 757 * codes to scripts. For example, the code for Arabic is 'Arab'. The 758 * four letter codes are encoded as a @guint32 by this function in a 759 * big-endian fashion. That is, the code returned for Arabic is 760 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc). 761 * 762 * See 763 * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html) 764 * for details. 765 * 766 * Params: 767 * script = a Unicode script 768 * 769 * Return: the ISO 15924 code for @script, encoded as an integer, 770 * of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or 771 * ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood. 772 * 773 * Since: 2.30 774 */ 775 public static uint unicodeScriptToIso15924(GUnicodeScript script) 776 { 777 return g_unicode_script_to_iso15924(script); 778 } 779 780 /** 781 * Convert a string from UTF-16 to UCS-4. The result will be 782 * nul-terminated. 783 * 784 * Params: 785 * str = a UTF-16 encoded string 786 * len = the maximum length (number of #gunichar2) of @str to use. 787 * If @len < 0, then the string is nul-terminated. 788 * itemsRead = location to store number of words read, 789 * or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be 790 * returned in case @str contains a trailing partial character. If 791 * an error occurs then the index of the invalid input is stored here. 792 * itemsWritten = location to store number of characters 793 * written, or %NULL. The value stored here does not include the trailing 794 * 0 character. 795 * 796 * Return: a pointer to a newly allocated UCS-4 string. 797 * This value must be freed with g_free(). If an error occurs, 798 * %NULL will be returned and @error set. 799 * 800 * Throws: GException on failure. 801 */ 802 public static dchar* utf16ToUcs4(wchar* str, glong len, glong* itemsRead, glong* itemsWritten) 803 { 804 GError* err = null; 805 806 auto p = g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, &err); 807 808 if (err !is null) 809 { 810 throw new GException( new ErrorG(err) ); 811 } 812 813 return p; 814 } 815 816 /** 817 * Convert a string from UTF-16 to UTF-8. The result will be 818 * terminated with a 0 byte. 819 * 820 * Note that the input is expected to be already in native endianness, 821 * an initial byte-order-mark character is not handled specially. 822 * g_convert() can be used to convert a byte buffer of UTF-16 data of 823 * ambiguous endianess. 824 * 825 * Further note that this function does not validate the result 826 * string; it may e.g. include embedded NUL characters. The only 827 * validation done by this function is to ensure that the input can 828 * be correctly interpreted as UTF-16, i.e. it doesn't contain 829 * things unpaired surrogates. 830 * 831 * Params: 832 * str = a UTF-16 encoded string 833 * len = the maximum length (number of #gunichar2) of @str to use. 834 * If @len < 0, then the string is nul-terminated. 835 * itemsRead = location to store number of words read, 836 * or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be 837 * returned in case @str contains a trailing partial character. If 838 * an error occurs then the index of the invalid input is stored here. 839 * itemsWritten = location to store number of bytes written, 840 * or %NULL. The value stored here does not include the trailing 0 byte. 841 * 842 * Return: a pointer to a newly allocated UTF-8 string. 843 * This value must be freed with g_free(). If an error occurs, 844 * %NULL will be returned and @error set. 845 * 846 * Throws: GException on failure. 847 */ 848 public static string utf16ToUtf8(wchar* str, glong len, glong* itemsRead, glong* itemsWritten) 849 { 850 GError* err = null; 851 852 auto retStr = g_utf16_to_utf8(str, len, itemsRead, itemsWritten, &err); 853 854 if (err !is null) 855 { 856 throw new GException( new ErrorG(err) ); 857 } 858 859 scope(exit) Str.freeString(retStr); 860 return Str.toString(retStr); 861 } 862 863 /** 864 * Converts a string into a form that is independent of case. The 865 * result will not correspond to any particular case, but can be 866 * compared for equality or ordered with the results of calling 867 * g_utf8_casefold() on other strings. 868 * 869 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is 870 * only an approximation to the correct linguistic case insensitive 871 * ordering, though it is a fairly good one. Getting this exactly 872 * right would require a more sophisticated collation function that 873 * takes case sensitivity into account. GLib does not currently 874 * provide such a function. 875 * 876 * Params: 877 * str = a UTF-8 encoded string 878 * len = length of @str, in bytes, or -1 if @str is nul-terminated. 879 * 880 * Return: a newly allocated string, that is a 881 * case independent form of @str. 882 */ 883 public static string utf8Casefold(string str, ptrdiff_t len) 884 { 885 auto retStr = g_utf8_casefold(Str.toStringz(str), len); 886 887 scope(exit) Str.freeString(retStr); 888 return Str.toString(retStr); 889 } 890 891 /** 892 * Compares two strings for ordering using the linguistically 893 * correct rules for the [current locale][setlocale]. 894 * When sorting a large number of strings, it will be significantly 895 * faster to obtain collation keys with g_utf8_collate_key() and 896 * compare the keys with strcmp() when sorting instead of sorting 897 * the original strings. 898 * 899 * Params: 900 * str1 = a UTF-8 encoded string 901 * str2 = a UTF-8 encoded string 902 * 903 * Return: < 0 if @str1 compares before @str2, 904 * 0 if they compare equal, > 0 if @str1 compares after @str2. 905 */ 906 public static int utf8Collate(string str1, string str2) 907 { 908 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2)); 909 } 910 911 /** 912 * Converts a string into a collation key that can be compared 913 * with other collation keys produced by the same function using 914 * strcmp(). 915 * 916 * The results of comparing the collation keys of two strings 917 * with strcmp() will always be the same as comparing the two 918 * original keys with g_utf8_collate(). 919 * 920 * Note that this function depends on the [current locale][setlocale]. 921 * 922 * Params: 923 * str = a UTF-8 encoded string. 924 * len = length of @str, in bytes, or -1 if @str is nul-terminated. 925 * 926 * Return: a newly allocated string. This string should 927 * be freed with g_free() when you are done with it. 928 */ 929 public static string utf8CollateKey(string str, ptrdiff_t len) 930 { 931 auto retStr = g_utf8_collate_key(Str.toStringz(str), len); 932 933 scope(exit) Str.freeString(retStr); 934 return Str.toString(retStr); 935 } 936 937 /** 938 * Converts a string into a collation key that can be compared 939 * with other collation keys produced by the same function using strcmp(). 940 * 941 * In order to sort filenames correctly, this function treats the dot '.' 942 * as a special case. Most dictionary orderings seem to consider it 943 * insignificant, thus producing the ordering "event.c" "eventgenerator.c" 944 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we 945 * would like to treat numbers intelligently so that "file1" "file10" "file5" 946 * is sorted as "file1" "file5" "file10". 947 * 948 * Note that this function depends on the [current locale][setlocale]. 949 * 950 * Params: 951 * str = a UTF-8 encoded string. 952 * len = length of @str, in bytes, or -1 if @str is nul-terminated. 953 * 954 * Return: a newly allocated string. This string should 955 * be freed with g_free() when you are done with it. 956 * 957 * Since: 2.8 958 */ 959 public static string utf8CollateKeyForFilename(string str, ptrdiff_t len) 960 { 961 auto retStr = g_utf8_collate_key_for_filename(Str.toStringz(str), len); 962 963 scope(exit) Str.freeString(retStr); 964 return Str.toString(retStr); 965 } 966 967 /** 968 * Finds the start of the next UTF-8 character in the string after @p. 969 * 970 * @p does not have to be at the beginning of a UTF-8 character. No check 971 * is made to see if the character found is actually valid other than 972 * it starts with an appropriate byte. 973 * 974 * Params: 975 * p = a pointer to a position within a UTF-8 encoded string 976 * end = a pointer to the byte following the end of the string, 977 * or %NULL to indicate that the string is nul-terminated 978 * 979 * Return: a pointer to the found character or %NULL 980 */ 981 public static string utf8FindNextChar(string p, string end) 982 { 983 auto retStr = g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)); 984 985 scope(exit) Str.freeString(retStr); 986 return Str.toString(retStr); 987 } 988 989 /** 990 * Given a position @p with a UTF-8 encoded string @str, find the start 991 * of the previous UTF-8 character starting before @p. Returns %NULL if no 992 * UTF-8 characters are present in @str before @p. 993 * 994 * @p does not have to be at the beginning of a UTF-8 character. No check 995 * is made to see if the character found is actually valid other than 996 * it starts with an appropriate byte. 997 * 998 * Params: 999 * str = pointer to the beginning of a UTF-8 encoded string 1000 * p = pointer to some position within @str 1001 * 1002 * Return: a pointer to the found character or %NULL. 1003 */ 1004 public static string utf8FindPrevChar(string str, string p) 1005 { 1006 auto retStr = g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)); 1007 1008 scope(exit) Str.freeString(retStr); 1009 return Str.toString(retStr); 1010 } 1011 1012 /** 1013 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. 1014 * 1015 * If @p does not point to a valid UTF-8 encoded character, results 1016 * are undefined. If you are not sure that the bytes are complete 1017 * valid Unicode characters, you should use g_utf8_get_char_validated() 1018 * instead. 1019 * 1020 * Params: 1021 * p = a pointer to Unicode character encoded as UTF-8 1022 * 1023 * Return: the resulting character 1024 */ 1025 public static dchar utf8GetChar(string p) 1026 { 1027 return g_utf8_get_char(Str.toStringz(p)); 1028 } 1029 1030 /** 1031 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character. 1032 * This function checks for incomplete characters, for invalid characters 1033 * such as characters that are out of the range of Unicode, and for 1034 * overlong encodings of valid characters. 1035 * 1036 * Params: 1037 * p = a pointer to Unicode character encoded as UTF-8 1038 * maxLen = the maximum number of bytes to read, or -1, for no maximum or 1039 * if @p is nul-terminated 1040 * 1041 * Return: the resulting character. If @p points to a partial 1042 * sequence at the end of a string that could begin a valid 1043 * character (or if @max_len is zero), returns (gunichar)-2; 1044 * otherwise, if @p does not point to a valid UTF-8 encoded 1045 * Unicode character, returns (gunichar)-1. 1046 */ 1047 public static dchar utf8GetCharValidated(string p, ptrdiff_t maxLen) 1048 { 1049 return g_utf8_get_char_validated(Str.toStringz(p), maxLen); 1050 } 1051 1052 /** 1053 * Converts a string into canonical form, standardizing 1054 * such issues as whether a character with an accent 1055 * is represented as a base character and combining 1056 * accent or as a single precomposed character. The 1057 * string has to be valid UTF-8, otherwise %NULL is 1058 * returned. You should generally call g_utf8_normalize() 1059 * before comparing two Unicode strings. 1060 * 1061 * The normalization mode %G_NORMALIZE_DEFAULT only 1062 * standardizes differences that do not affect the 1063 * text content, such as the above-mentioned accent 1064 * representation. %G_NORMALIZE_ALL also standardizes 1065 * the "compatibility" characters in Unicode, such 1066 * as SUPERSCRIPT THREE to the standard forms 1067 * (in this case DIGIT THREE). Formatting information 1068 * may be lost but for most text operations such 1069 * characters should be considered the same. 1070 * 1071 * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE 1072 * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL, 1073 * but returned a result with composed forms rather 1074 * than a maximally decomposed form. This is often 1075 * useful if you intend to convert the string to 1076 * a legacy encoding or pass it to a system with 1077 * less capable Unicode handling. 1078 * 1079 * Params: 1080 * str = a UTF-8 encoded string. 1081 * len = length of @str, in bytes, or -1 if @str is nul-terminated. 1082 * mode = the type of normalization to perform. 1083 * 1084 * Return: a newly allocated string, that is the 1085 * normalized form of @str, or %NULL if @str is not 1086 * valid UTF-8. 1087 */ 1088 public static string utf8Normalize(string str, ptrdiff_t len, GNormalizeMode mode) 1089 { 1090 auto retStr = g_utf8_normalize(Str.toStringz(str), len, mode); 1091 1092 scope(exit) Str.freeString(retStr); 1093 return Str.toString(retStr); 1094 } 1095 1096 /** 1097 * Converts from an integer character offset to a pointer to a position 1098 * within the string. 1099 * 1100 * Since 2.10, this function allows to pass a negative @offset to 1101 * step backwards. It is usually worth stepping backwards from the end 1102 * instead of forwards if @offset is in the last fourth of the string, 1103 * since moving forward is about 3 times faster than moving backward. 1104 * 1105 * Note that this function doesn't abort when reaching the end of @str. 1106 * Therefore you should be sure that @offset is within string boundaries 1107 * before calling that function. Call g_utf8_strlen() when unsure. 1108 * This limitation exists as this function is called frequently during 1109 * text rendering and therefore has to be as fast as possible. 1110 * 1111 * Params: 1112 * str = a UTF-8 encoded string 1113 * offset = a character offset within @str 1114 * 1115 * Return: the resulting pointer 1116 */ 1117 public static string utf8OffsetToPointer(string str, glong offset) 1118 { 1119 auto retStr = g_utf8_offset_to_pointer(Str.toStringz(str), offset); 1120 1121 scope(exit) Str.freeString(retStr); 1122 return Str.toString(retStr); 1123 } 1124 1125 /** 1126 * Converts from a pointer to position within a string to a integer 1127 * character offset. 1128 * 1129 * Since 2.10, this function allows @pos to be before @str, and returns 1130 * a negative offset in this case. 1131 * 1132 * Params: 1133 * str = a UTF-8 encoded string 1134 * pos = a pointer to a position within @str 1135 * 1136 * Return: the resulting character offset 1137 */ 1138 public static glong utf8PointerToOffset(string str, string pos) 1139 { 1140 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos)); 1141 } 1142 1143 /** 1144 * Finds the previous UTF-8 character in the string before @p. 1145 * 1146 * @p does not have to be at the beginning of a UTF-8 character. No check 1147 * is made to see if the character found is actually valid other than 1148 * it starts with an appropriate byte. If @p might be the first 1149 * character of the string, you must use g_utf8_find_prev_char() instead. 1150 * 1151 * Params: 1152 * p = a pointer to a position within a UTF-8 encoded string 1153 * 1154 * Return: a pointer to the found character 1155 */ 1156 public static string utf8PrevChar(string p) 1157 { 1158 auto retStr = g_utf8_prev_char(Str.toStringz(p)); 1159 1160 scope(exit) Str.freeString(retStr); 1161 return Str.toString(retStr); 1162 } 1163 1164 /** 1165 * Finds the leftmost occurrence of the given Unicode character 1166 * in a UTF-8 encoded string, while limiting the search to @len bytes. 1167 * If @len is -1, allow unbounded search. 1168 * 1169 * Params: 1170 * p = a nul-terminated UTF-8 encoded string 1171 * len = the maximum length of @p 1172 * c = a Unicode character 1173 * 1174 * Return: %NULL if the string does not contain the character, 1175 * otherwise, a pointer to the start of the leftmost occurrence 1176 * of the character in the string. 1177 */ 1178 public static string utf8Strchr(string p, ptrdiff_t len, dchar c) 1179 { 1180 auto retStr = g_utf8_strchr(Str.toStringz(p), len, c); 1181 1182 scope(exit) Str.freeString(retStr); 1183 return Str.toString(retStr); 1184 } 1185 1186 /** 1187 * Converts all Unicode characters in the string that have a case 1188 * to lowercase. The exact manner that this is done depends 1189 * on the current locale, and may result in the number of 1190 * characters in the string changing. 1191 * 1192 * Params: 1193 * str = a UTF-8 encoded string 1194 * len = length of @str, in bytes, or -1 if @str is nul-terminated. 1195 * 1196 * Return: a newly allocated string, with all characters 1197 * converted to lowercase. 1198 */ 1199 public static string utf8Strdown(string str, ptrdiff_t len) 1200 { 1201 auto retStr = g_utf8_strdown(Str.toStringz(str), len); 1202 1203 scope(exit) Str.freeString(retStr); 1204 return Str.toString(retStr); 1205 } 1206 1207 /** 1208 * Computes the length of the string in characters, not including 1209 * the terminating nul character. If the @max'th byte falls in the 1210 * middle of a character, the last (partial) character is not counted. 1211 * 1212 * Params: 1213 * p = pointer to the start of a UTF-8 encoded string 1214 * max = the maximum number of bytes to examine. If @max 1215 * is less than 0, then the string is assumed to be 1216 * nul-terminated. If @max is 0, @p will not be examined and 1217 * may be %NULL. If @max is greater than 0, up to @max 1218 * bytes are examined 1219 * 1220 * Return: the length of the string in characters 1221 */ 1222 public static glong utf8Strlen(string p, ptrdiff_t max) 1223 { 1224 return g_utf8_strlen(Str.toStringz(p), max); 1225 } 1226 1227 /** 1228 * Like the standard C strncpy() function, but copies a given number 1229 * of characters instead of a given number of bytes. The @src string 1230 * must be valid UTF-8 encoded text. (Use g_utf8_validate() on all 1231 * text before trying to use UTF-8 utility functions with it.) 1232 * 1233 * Params: 1234 * dest = buffer to fill with characters from @src 1235 * src = UTF-8 encoded string 1236 * n = character count 1237 * 1238 * Return: @dest 1239 */ 1240 public static string utf8Strncpy(string dest, string src, size_t n) 1241 { 1242 auto retStr = g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n); 1243 1244 scope(exit) Str.freeString(retStr); 1245 return Str.toString(retStr); 1246 } 1247 1248 /** 1249 * Find the rightmost occurrence of the given Unicode character 1250 * in a UTF-8 encoded string, while limiting the search to @len bytes. 1251 * If @len is -1, allow unbounded search. 1252 * 1253 * Params: 1254 * p = a nul-terminated UTF-8 encoded string 1255 * len = the maximum length of @p 1256 * c = a Unicode character 1257 * 1258 * Return: %NULL if the string does not contain the character, 1259 * otherwise, a pointer to the start of the rightmost occurrence 1260 * of the character in the string. 1261 */ 1262 public static string utf8Strrchr(string p, ptrdiff_t len, dchar c) 1263 { 1264 auto retStr = g_utf8_strrchr(Str.toStringz(p), len, c); 1265 1266 scope(exit) Str.freeString(retStr); 1267 return Str.toString(retStr); 1268 } 1269 1270 /** 1271 * Reverses a UTF-8 string. @str must be valid UTF-8 encoded text. 1272 * (Use g_utf8_validate() on all text before trying to use UTF-8 1273 * utility functions with it.) 1274 * 1275 * This function is intended for programmatic uses of reversed strings. 1276 * It pays no attention to decomposed characters, combining marks, byte 1277 * order marks, directional indicators (LRM, LRO, etc) and similar 1278 * characters which might need special handling when reversing a string 1279 * for display purposes. 1280 * 1281 * Note that unlike g_strreverse(), this function returns 1282 * newly-allocated memory, which should be freed with g_free() when 1283 * no longer needed. 1284 * 1285 * Params: 1286 * str = a UTF-8 encoded string 1287 * len = the maximum length of @str to use, in bytes. If @len < 0, 1288 * then the string is nul-terminated. 1289 * 1290 * Return: a newly-allocated string which is the reverse of @str 1291 * 1292 * Since: 2.2 1293 */ 1294 public static string utf8Strreverse(string str, ptrdiff_t len) 1295 { 1296 auto retStr = g_utf8_strreverse(Str.toStringz(str), len); 1297 1298 scope(exit) Str.freeString(retStr); 1299 return Str.toString(retStr); 1300 } 1301 1302 /** 1303 * Converts all Unicode characters in the string that have a case 1304 * to uppercase. The exact manner that this is done depends 1305 * on the current locale, and may result in the number of 1306 * characters in the string increasing. (For instance, the 1307 * German ess-zet will be changed to SS.) 1308 * 1309 * Params: 1310 * str = a UTF-8 encoded string 1311 * len = length of @str, in bytes, or -1 if @str is nul-terminated. 1312 * 1313 * Return: a newly allocated string, with all characters 1314 * converted to uppercase. 1315 */ 1316 public static string utf8Strup(string str, ptrdiff_t len) 1317 { 1318 auto retStr = g_utf8_strup(Str.toStringz(str), len); 1319 1320 scope(exit) Str.freeString(retStr); 1321 return Str.toString(retStr); 1322 } 1323 1324 /** 1325 * Copies a substring out of a UTF-8 encoded string. 1326 * The substring will contain @end_pos - @start_pos characters. 1327 * 1328 * Params: 1329 * str = a UTF-8 encoded string 1330 * startPos = a character offset within @str 1331 * endPos = another character offset within @str 1332 * 1333 * Return: a newly allocated copy of the requested 1334 * substring. Free with g_free() when no longer needed. 1335 * 1336 * Since: 2.30 1337 */ 1338 public static string utf8Substring(string str, glong startPos, glong endPos) 1339 { 1340 auto retStr = g_utf8_substring(Str.toStringz(str), startPos, endPos); 1341 1342 scope(exit) Str.freeString(retStr); 1343 return Str.toString(retStr); 1344 } 1345 1346 /** 1347 * Convert a string from UTF-8 to a 32-bit fixed width 1348 * representation as UCS-4. A trailing 0 character will be added to the 1349 * string after the converted text. 1350 * 1351 * Params: 1352 * str = a UTF-8 encoded string 1353 * len = the maximum length of @str to use, in bytes. If @len < 0, 1354 * then the string is nul-terminated. 1355 * itemsRead = location to store number of bytes read, or %NULL. 1356 * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be 1357 * returned in case @str contains a trailing partial 1358 * character. If an error occurs then the index of the 1359 * invalid input is stored here. 1360 * itemsWritten = location to store number of characters 1361 * written or %NULL. The value here stored does not include the 1362 * trailing 0 character. 1363 * 1364 * Return: a pointer to a newly allocated UCS-4 string. 1365 * This value must be freed with g_free(). If an error occurs, 1366 * %NULL will be returned and @error set. 1367 * 1368 * Throws: GException on failure. 1369 */ 1370 public static dchar* utf8ToUcs4(string str, glong len, glong* itemsRead, glong* itemsWritten) 1371 { 1372 GError* err = null; 1373 1374 auto p = g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, &err); 1375 1376 if (err !is null) 1377 { 1378 throw new GException( new ErrorG(err) ); 1379 } 1380 1381 return p; 1382 } 1383 1384 /** 1385 * Convert a string from UTF-8 to a 32-bit fixed width 1386 * representation as UCS-4, assuming valid UTF-8 input. 1387 * This function is roughly twice as fast as g_utf8_to_ucs4() 1388 * but does no error checking on the input. A trailing 0 character 1389 * will be added to the string after the converted text. 1390 * 1391 * Params: 1392 * str = a UTF-8 encoded string 1393 * len = the maximum length of @str to use, in bytes. If @len < 0, 1394 * then the string is nul-terminated. 1395 * itemsWritten = location to store the number of 1396 * characters in the result, or %NULL. 1397 * 1398 * Return: a pointer to a newly allocated UCS-4 string. 1399 * This value must be freed with g_free(). 1400 */ 1401 public static dchar* utf8ToUcs4Fast(string str, glong len, glong* itemsWritten) 1402 { 1403 return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten); 1404 } 1405 1406 /** 1407 * Convert a string from UTF-8 to UTF-16. A 0 character will be 1408 * added to the result after the converted text. 1409 * 1410 * Params: 1411 * str = a UTF-8 encoded string 1412 * len = the maximum length (number of bytes) of @str to use. 1413 * If @len < 0, then the string is nul-terminated. 1414 * itemsRead = location to store number of bytes read, 1415 * or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be 1416 * returned in case @str contains a trailing partial character. If 1417 * an error occurs then the index of the invalid input is stored here. 1418 * itemsWritten = location to store number of #gunichar2 1419 * written, or %NULL. The value stored here does not include the 1420 * trailing 0. 1421 * 1422 * Return: a pointer to a newly allocated UTF-16 string. 1423 * This value must be freed with g_free(). If an error occurs, 1424 * %NULL will be returned and @error set. 1425 * 1426 * Throws: GException on failure. 1427 */ 1428 public static wchar* utf8ToUtf16(string str, glong len, glong* itemsRead, glong* itemsWritten) 1429 { 1430 GError* err = null; 1431 1432 auto p = g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, &err); 1433 1434 if (err !is null) 1435 { 1436 throw new GException( new ErrorG(err) ); 1437 } 1438 1439 return p; 1440 } 1441 1442 /** 1443 * Validates UTF-8 encoded text. @str is the text to validate; 1444 * if @str is nul-terminated, then @max_len can be -1, otherwise 1445 * @max_len should be the number of bytes to validate. 1446 * If @end is non-%NULL, then the end of the valid range 1447 * will be stored there (i.e. the start of the first invalid 1448 * character if some bytes were invalid, or the end of the text 1449 * being validated otherwise). 1450 * 1451 * Note that g_utf8_validate() returns %FALSE if @max_len is 1452 * positive and any of the @max_len bytes are nul. 1453 * 1454 * Returns %TRUE if all of @str was valid. Many GLib and GTK+ 1455 * routines require valid UTF-8 as input; so data read from a file 1456 * or the network should be checked with g_utf8_validate() before 1457 * doing anything else with it. 1458 * 1459 * Params: 1460 * str = a pointer to character data 1461 * maxLen = max bytes to validate, or -1 to go until NUL 1462 * end = return location for end of valid data 1463 * 1464 * Return: %TRUE if the text was valid UTF-8 1465 */ 1466 public static bool utf8Validate(string str, out string end) 1467 { 1468 char* outend = null; 1469 1470 auto p = g_utf8_validate(Str.toStringz(str), cast(ptrdiff_t)str.length, &outend) != 0; 1471 1472 end = Str.toString(outend); 1473 1474 return p; 1475 } 1476 }