1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 /* 25 * Conversion parameters: 26 * inFile = glib-Unicode-Manipulation.html 27 * outPack = glib 28 * outFile = Unicode 29 * strct = 30 * realStrct= 31 * ctorStrct= 32 * clss = Unicode 33 * interf = 34 * class Code: No 35 * interface Code: No 36 * template for: 37 * extend = 38 * implements: 39 * prefixes: 40 * - g_ 41 * omit structs: 42 * omit prefixes: 43 * omit code: 44 * omit signals: 45 * imports: 46 * - glib.Str 47 * - glib.ErrorG 48 * - glib.GException 49 * structWrap: 50 * module aliases: 51 * local aliases: 52 * overrides: 53 */ 54 55 module glib.Unicode; 56 57 public import gtkc.glibtypes; 58 59 private import gtkc.glib; 60 private import glib.ConstructionException; 61 62 63 private import glib.Str; 64 private import glib.ErrorG; 65 private import glib.GException; 66 67 68 69 70 /** 71 * Description 72 * This section describes a number of functions for dealing with 73 * Unicode characters and strings. There are analogues of the 74 * traditional ctype.h character classification 75 * and case conversion functions, UTF-8 analogues of some string utility 76 * functions, functions to perform normalization, case conversion and 77 * collation on UTF-8 strings and finally functions to convert between 78 * the UTF-8, UTF-16 and UCS-4 encodings of Unicode. 79 * The implementations of the Unicode functions in GLib are based 80 * on the Unicode Character Data tables, which are available from 81 * www.unicode.org. 82 * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1, 83 * GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1. 84 */ 85 public class Unicode 86 { 87 88 /** 89 */ 90 91 /** 92 * Checks whether ch is a valid Unicode character. Some possible 93 * integer values of ch will not be valid. 0 is considered a valid 94 * character, though it's normally a string terminator. 95 * Params: 96 * ch = a Unicode character 97 * Returns: TRUE if ch is a valid Unicode character 98 */ 99 public static int unicharValidate(gunichar ch) 100 { 101 // gboolean g_unichar_validate (gunichar ch); 102 return g_unichar_validate(ch); 103 } 104 105 /** 106 * Determines whether a character is alphanumeric. 107 * Given some UTF-8 text, obtain a character value 108 * with g_utf8_get_char(). 109 * Params: 110 * c = a Unicode character 111 * Returns: TRUE if c is an alphanumeric character 112 */ 113 public static int unicharIsalnum(gunichar c) 114 { 115 // gboolean g_unichar_isalnum (gunichar c); 116 return g_unichar_isalnum(c); 117 } 118 119 /** 120 * Determines whether a character is alphabetic (i.e. a letter). 121 * Given some UTF-8 text, obtain a character value with 122 * g_utf8_get_char(). 123 * Params: 124 * c = a Unicode character 125 * Returns: TRUE if c is an alphabetic character 126 */ 127 public static int unicharIsalpha(gunichar c) 128 { 129 // gboolean g_unichar_isalpha (gunichar c); 130 return g_unichar_isalpha(c); 131 } 132 133 /** 134 * Determines whether a character is a control character. 135 * Given some UTF-8 text, obtain a character value with 136 * g_utf8_get_char(). 137 * Params: 138 * c = a Unicode character 139 * Returns: TRUE if c is a control character 140 */ 141 public static int unicharIscntrl(gunichar c) 142 { 143 // gboolean g_unichar_iscntrl (gunichar c); 144 return g_unichar_iscntrl(c); 145 } 146 147 /** 148 * Determines if a given character is assigned in the Unicode 149 * standard. 150 * Params: 151 * c = a Unicode character 152 * Returns: TRUE if the character has an assigned value 153 */ 154 public static int unicharIsdefined(gunichar c) 155 { 156 // gboolean g_unichar_isdefined (gunichar c); 157 return g_unichar_isdefined(c); 158 } 159 160 /** 161 * Determines whether a character is numeric (i.e. a digit). This 162 * covers ASCII 0-9 and also digits in other languages/scripts. Given 163 * some UTF-8 text, obtain a character value with g_utf8_get_char(). 164 * Params: 165 * c = a Unicode character 166 * Returns: TRUE if c is a digit 167 */ 168 public static int unicharIsdigit(gunichar c) 169 { 170 // gboolean g_unichar_isdigit (gunichar c); 171 return g_unichar_isdigit(c); 172 } 173 174 /** 175 * Determines whether a character is printable and not a space 176 * (returns FALSE for control characters, format characters, and 177 * spaces). g_unichar_isprint() is similar, but returns TRUE for 178 * spaces. Given some UTF-8 text, obtain a character value with 179 * g_utf8_get_char(). 180 * Params: 181 * c = a Unicode character 182 * Returns: TRUE if c is printable unless it's a space 183 */ 184 public static int unicharIsgraph(gunichar c) 185 { 186 // gboolean g_unichar_isgraph (gunichar c); 187 return g_unichar_isgraph(c); 188 } 189 190 /** 191 * Determines whether a character is a lowercase letter. 192 * Given some UTF-8 text, obtain a character value with 193 * g_utf8_get_char(). 194 * Params: 195 * c = a Unicode character 196 * Returns: TRUE if c is a lowercase letter 197 */ 198 public static int unicharIslower(gunichar c) 199 { 200 // gboolean g_unichar_islower (gunichar c); 201 return g_unichar_islower(c); 202 } 203 204 /** 205 * Determines whether a character is a mark (non-spacing mark, 206 * combining mark, or enclosing mark in Unicode speak). 207 * Given some UTF-8 text, obtain a character value 208 * with g_utf8_get_char(). 209 * Note: in most cases where isalpha characters are allowed, 210 * ismark characters should be allowed to as they are essential 211 * for writing most European languages as well as many non-Latin 212 * scripts. 213 * Since 2.14 214 * Params: 215 * c = a Unicode character 216 * Returns: TRUE if c is a mark character 217 */ 218 public static int unicharIsmark(gunichar c) 219 { 220 // gboolean g_unichar_ismark (gunichar c); 221 return g_unichar_ismark(c); 222 } 223 224 /** 225 * Determines whether a character is printable. 226 * Unlike g_unichar_isgraph(), returns TRUE for spaces. 227 * Given some UTF-8 text, obtain a character value with 228 * g_utf8_get_char(). 229 * Params: 230 * c = a Unicode character 231 * Returns: TRUE if c is printable 232 */ 233 public static int unicharIsprint(gunichar c) 234 { 235 // gboolean g_unichar_isprint (gunichar c); 236 return g_unichar_isprint(c); 237 } 238 239 /** 240 * Determines whether a character is punctuation or a symbol. 241 * Given some UTF-8 text, obtain a character value with 242 * g_utf8_get_char(). 243 * Params: 244 * c = a Unicode character 245 * Returns: TRUE if c is a punctuation or symbol character 246 */ 247 public static int unicharIspunct(gunichar c) 248 { 249 // gboolean g_unichar_ispunct (gunichar c); 250 return g_unichar_ispunct(c); 251 } 252 253 /** 254 * Determines whether a character is a space, tab, or line separator 255 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a 256 * character value with g_utf8_get_char(). 257 * (Note: don't use this to do word breaking; you have to use 258 * Pango or equivalent to get word breaking right, the algorithm 259 * is fairly complex.) 260 * Params: 261 * c = a Unicode character 262 * Returns: TRUE if c is a space character 263 */ 264 public static int unicharIsspace(gunichar c) 265 { 266 // gboolean g_unichar_isspace (gunichar c); 267 return g_unichar_isspace(c); 268 } 269 270 /** 271 * Determines if a character is titlecase. Some characters in 272 * Unicode which are composites, such as the DZ digraph 273 * have three case variants instead of just two. The titlecase 274 * form is used at the beginning of a word where only the 275 * first letter is capitalized. The titlecase form of the DZ 276 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. 277 * Params: 278 * c = a Unicode character 279 * Returns: TRUE if the character is titlecase 280 */ 281 public static int unicharIstitle(gunichar c) 282 { 283 // gboolean g_unichar_istitle (gunichar c); 284 return g_unichar_istitle(c); 285 } 286 287 /** 288 * Determines if a character is uppercase. 289 * Params: 290 * c = a Unicode character 291 * Returns: TRUE if c is an uppercase character 292 */ 293 public static int unicharIsupper(gunichar c) 294 { 295 // gboolean g_unichar_isupper (gunichar c); 296 return g_unichar_isupper(c); 297 } 298 299 /** 300 * Determines if a character is a hexidecimal digit. 301 * Params: 302 * c = a Unicode character. 303 * Returns: TRUE if the character is a hexadecimal digit 304 */ 305 public static int unicharIsxdigit(gunichar c) 306 { 307 // gboolean g_unichar_isxdigit (gunichar c); 308 return g_unichar_isxdigit(c); 309 } 310 311 /** 312 * Determines if a character is typically rendered in a double-width 313 * cell. 314 * Params: 315 * c = a Unicode character 316 * Returns: TRUE if the character is wide 317 */ 318 public static int unicharIswide(gunichar c) 319 { 320 // gboolean g_unichar_iswide (gunichar c); 321 return g_unichar_iswide(c); 322 } 323 324 /** 325 * Determines if a character is typically rendered in a double-width 326 * cell under legacy East Asian locales. If a character is wide according to 327 * g_unichar_iswide(), then it is also reported wide with this function, but 328 * the converse is not necessarily true. See the 329 * Unicode Standard 330 * Annex #11 for details. 331 * If a character passes the g_unichar_iswide() test then it will also pass 332 * this test, but not the other way around. Note that some characters may 333 * pas both this test and g_unichar_iszerowidth(). 334 * Since 2.12 335 * Params: 336 * c = a Unicode character 337 * Returns: TRUE if the character is wide in legacy East Asian locales 338 */ 339 public static int unicharIswideCjk(gunichar c) 340 { 341 // gboolean g_unichar_iswide_cjk (gunichar c); 342 return g_unichar_iswide_cjk(c); 343 } 344 345 /** 346 * Determines if a given character typically takes zero width when rendered. 347 * The return value is TRUE for all non-spacing and enclosing marks 348 * (e.g., combining accents), format characters, zero-width 349 * space, but not U+00AD SOFT HYPHEN. 350 * A typical use of this function is with one of g_unichar_iswide() or 351 * g_unichar_iswide_cjk() to determine the number of cells a string occupies 352 * when displayed on a grid display (terminals). However, note that not all 353 * terminals support zero-width rendering of zero-width marks. 354 * Since 2.14 355 * Params: 356 * c = a Unicode character 357 * Returns: TRUE if the character has zero width 358 */ 359 public static int unicharIszerowidth(gunichar c) 360 { 361 // gboolean g_unichar_iszerowidth (gunichar c); 362 return g_unichar_iszerowidth(c); 363 } 364 365 /** 366 * Converts a character to uppercase. 367 * Params: 368 * c = a Unicode character 369 * Returns: the result of converting c to uppercase. If c is not an lowercase or titlecase character, or has no upper case equivalent c is returned unchanged. 370 */ 371 public static gunichar unicharToupper(gunichar c) 372 { 373 // gunichar g_unichar_toupper (gunichar c); 374 return g_unichar_toupper(c); 375 } 376 377 /** 378 * Converts a character to lower case. 379 * Params: 380 * c = a Unicode character. 381 * Returns: the result of converting c to lower case. If c is not an upperlower or titlecase character, or has no lowercase equivalent c is returned unchanged. 382 */ 383 public static gunichar unicharTolower(gunichar c) 384 { 385 // gunichar g_unichar_tolower (gunichar c); 386 return g_unichar_tolower(c); 387 } 388 389 /** 390 * Converts a character to the titlecase. 391 * Params: 392 * c = a Unicode character 393 * Returns: the result of converting c to titlecase. If c is not an uppercase or lowercase character, c is returned unchanged. 394 */ 395 public static gunichar unicharTotitle(gunichar c) 396 { 397 // gunichar g_unichar_totitle (gunichar c); 398 return g_unichar_totitle(c); 399 } 400 401 /** 402 * Determines the numeric value of a character as a decimal 403 * digit. 404 * Params: 405 * c = a Unicode character 406 * Returns: If c is a decimal digit (according to g_unichar_isdigit()), its numeric value. Otherwise, -1. 407 */ 408 public static int unicharDigitValue(gunichar c) 409 { 410 // gint g_unichar_digit_value (gunichar c); 411 return g_unichar_digit_value(c); 412 } 413 414 /** 415 * Determines the numeric value of a character as a hexidecimal 416 * digit. 417 * Params: 418 * c = a Unicode character 419 * Returns: If c is a hex digit (according to g_unichar_isxdigit()), its numeric value. Otherwise, -1. 420 */ 421 public static int unicharXdigitValue(gunichar c) 422 { 423 // gint g_unichar_xdigit_value (gunichar c); 424 return g_unichar_xdigit_value(c); 425 } 426 427 /** 428 * Classifies a Unicode character by type. 429 * Params: 430 * c = a Unicode character 431 * Returns: the type of the character. 432 */ 433 public static GUnicodeType unicharType(gunichar c) 434 { 435 // GUnicodeType g_unichar_type (gunichar c); 436 return g_unichar_type(c); 437 } 438 439 /** 440 * Determines the break type of c. c should be a Unicode character 441 * (to derive a character from UTF-8 encoded text, use 442 * g_utf8_get_char()). The break type is used to find word and line 443 * breaks ("text boundaries"), Pango implements the Unicode boundary 444 * resolution algorithms and normally you would use a function such 445 * as pango_break() instead of caring about break types yourself. 446 * Params: 447 * c = a Unicode character 448 * Returns: the break type of c 449 */ 450 public static GUnicodeBreakType unicharBreakType(gunichar c) 451 { 452 // GUnicodeBreakType g_unichar_break_type (gunichar c); 453 return g_unichar_break_type(c); 454 } 455 456 /** 457 * Determines the canonical combining class of a Unicode character. 458 * Since 2.14 459 * Params: 460 * uc = a Unicode character 461 * Returns: the combining class of the character 462 */ 463 public static int unicharCombiningClass(gunichar uc) 464 { 465 // gint g_unichar_combining_class (gunichar uc); 466 return g_unichar_combining_class(uc); 467 } 468 469 /** 470 * Computes the canonical ordering of a string in-place. 471 * This rearranges decomposed characters in the string 472 * according to their combining classes. See the Unicode 473 * manual for more information. 474 * Params: 475 * string = a UCS-4 encoded string. 476 * len = the maximum length of string to use. 477 */ 478 public static void unicodeCanonicalOrdering(gunichar* string, gsize len) 479 { 480 // void g_unicode_canonical_ordering (gunichar *string, gsize len); 481 g_unicode_canonical_ordering(string, len); 482 } 483 484 /** 485 * Computes the canonical decomposition of a Unicode character. 486 * Params: 487 * ch = a Unicode character. 488 * resultLen = location to store the length of the return value. 489 * Returns: a newly allocated string of Unicode characters. result_len is set to the resulting length of the string. 490 */ 491 public static gunichar* unicodeCanonicalDecomposition(gunichar ch, out gsize resultLen) 492 { 493 // gunichar * g_unicode_canonical_decomposition (gunichar ch, gsize *result_len); 494 return g_unicode_canonical_decomposition(ch, &resultLen); 495 } 496 497 /** 498 * In Unicode, some characters are mirrored. This 499 * means that their images are mirrored horizontally in text that is laid 500 * out from right to left. For instance, "(" would become its mirror image, 501 * ")", in right-to-left text. 502 * If ch has the Unicode mirrored property and there is another unicode 503 * character that typically has a glyph that is the mirror image of ch's 504 * glyph and mirrored_ch is set, it puts that character in the address 505 * pointed to by mirrored_ch. Otherwise the original character is put. 506 * Since 2.4 507 * Params: 508 * ch = a Unicode character 509 * mirroredCh = location to store the mirrored character 510 * Returns: TRUE if ch has a mirrored character, FALSE otherwise 511 */ 512 public static int unicharGetMirrorChar(gunichar ch, gunichar* mirroredCh) 513 { 514 // gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch); 515 return g_unichar_get_mirror_char(ch, mirroredCh); 516 } 517 518 /** 519 * Looks up the GUnicodeScript for a particular character (as defined 520 * by Unicode Standard Annex 24). No check is made for ch being a 521 * valid Unicode character; if you pass in invalid character, the 522 * result is undefined. 523 * This function is equivalent to pango_script_for_unichar() and the 524 * two are interchangeable. 525 * Since 2.14 526 * Params: 527 * ch = a Unicode character 528 * Returns: the GUnicodeScript for the character. 529 */ 530 public static GUnicodeScript unicharGetScript(gunichar ch) 531 { 532 // GUnicodeScript g_unichar_get_script (gunichar ch); 533 return g_unichar_get_script(ch); 534 } 535 536 /** 537 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. 538 * If p does not point to a valid UTF-8 encoded character, results are 539 * undefined. If you are not sure that the bytes are complete 540 * valid Unicode characters, you should use g_utf8_get_char_validated() 541 * instead. 542 * Params: 543 * p = a pointer to Unicode character encoded as UTF-8 544 * Returns: the resulting character 545 */ 546 public static gunichar utf8_GetChar(string p) 547 { 548 // gunichar g_utf8_get_char (const gchar *p); 549 return g_utf8_get_char(Str.toStringz(p)); 550 } 551 552 /** 553 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character. 554 * This function checks for incomplete characters, for invalid characters 555 * such as characters that are out of the range of Unicode, and for 556 * overlong encodings of valid characters. 557 * Params: 558 * p = a pointer to Unicode character encoded as UTF-8 559 * maxLen = the maximum number of bytes to read, or -1, for no maximum or 560 * if p is nul-terminated 561 * Returns: the resulting character. If p points to a partial sequence at the end of a string that could begin a valid character (or if max_len is zero), returns (gunichar)-2; otherwise, if p does not point to a valid UTF-8 encoded Unicode character, returns (gunichar)-1. 562 */ 563 public static gunichar utf8_GetCharValidated(string p, gssize maxLen) 564 { 565 // gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len); 566 return g_utf8_get_char_validated(Str.toStringz(p), maxLen); 567 } 568 569 /** 570 * Converts from an integer character offset to a pointer to a position 571 * within the string. 572 * Since 2.10, this function allows to pass a negative offset to 573 * step backwards. It is usually worth stepping backwards from the end 574 * instead of forwards if offset is in the last fourth of the string, 575 * since moving forward is about 3 times faster than moving backward. 576 * Note 577 * This function doesn't abort when reaching the end of str. Therefore 578 * you should be sure that offset is within string boundaries before 579 * calling that function. Call g_utf8_strlen() when unsure. 580 * This limitation exists as this function is called frequently during 581 * text rendering and therefore has to be as fast as possible. 582 * Params: 583 * str = a UTF-8 encoded string 584 * offset = a character offset within str 585 * Returns: the resulting pointer 586 */ 587 public static string utf8_OffsetToPointer(string str, glong offset) 588 { 589 // gchar * g_utf8_offset_to_pointer (const gchar *str, glong offset); 590 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset)); 591 } 592 593 /** 594 * Converts from a pointer to position within a string to a integer 595 * character offset. 596 * Since 2.10, this function allows pos to be before str, and returns 597 * a negative offset in this case. 598 * Params: 599 * str = a UTF-8 encoded string 600 * pos = a pointer to a position within str 601 * Returns: the resulting character offset 602 */ 603 public static glong utf8_PointerToOffset(string str, string pos) 604 { 605 // glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos); 606 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos)); 607 } 608 609 /** 610 * Finds the previous UTF-8 character in the string before p. 611 * p does not have to be at the beginning of a UTF-8 character. No check 612 * is made to see if the character found is actually valid other than 613 * it starts with an appropriate byte. If p might be the first 614 * character of the string, you must use g_utf8_find_prev_char() instead. 615 * Params: 616 * p = a pointer to a position within a UTF-8 encoded string 617 * Returns: a pointer to the found character. 618 */ 619 public static string utf8_PrevChar(string p) 620 { 621 // gchar * g_utf8_prev_char (const gchar *p); 622 return Str.toString(g_utf8_prev_char(Str.toStringz(p))); 623 } 624 625 /** 626 * Finds the start of the next UTF-8 character in the string after p. 627 * p does not have to be at the beginning of a UTF-8 character. No check 628 * is made to see if the character found is actually valid other than 629 * it starts with an appropriate byte. 630 * Params: 631 * p = a pointer to a position within a UTF-8 encoded string 632 * end = a pointer to the byte following the end of the string, 633 * or NULL to indicate that the string is nul-terminated. 634 * Returns: a pointer to the found character or NULL 635 */ 636 public static string utf8_FindNextChar(string p, string end) 637 { 638 // gchar * g_utf8_find_next_char (const gchar *p, const gchar *end); 639 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end))); 640 } 641 642 /** 643 * Given a position p with a UTF-8 encoded string str, find the start 644 * of the previous UTF-8 character starting before p. Returns NULL if no 645 * UTF-8 characters are present in str before p. 646 * p does not have to be at the beginning of a UTF-8 character. No check 647 * is made to see if the character found is actually valid other than 648 * it starts with an appropriate byte. 649 * Params: 650 * str = pointer to the beginning of a UTF-8 encoded string 651 * p = pointer to some position within str 652 * Returns: a pointer to the found character or NULL. 653 */ 654 public static string utf8_FindPrevChar(string str, string p) 655 { 656 // gchar * g_utf8_find_prev_char (const gchar *str, const gchar *p); 657 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p))); 658 } 659 660 /** 661 * Computes the length of the string in characters, not including 662 * the terminating nul character. 663 * Params: 664 * p = pointer to the start of a UTF-8 encoded string 665 * max = the maximum number of bytes to examine. If max 666 * is less than 0, then the string is assumed to be 667 * nul-terminated. If max is 0, p will not be examined and 668 * may be NULL. 669 * Returns: the length of the string in characters 670 */ 671 public static glong utf8_Strlen(string p, gssize max) 672 { 673 // glong g_utf8_strlen (const gchar *p, gssize max); 674 return g_utf8_strlen(Str.toStringz(p), max); 675 } 676 677 /** 678 * Like the standard C strncpy() function, but 679 * copies a given number of characters instead of a given number of 680 * bytes. The src string must be valid UTF-8 encoded text. 681 * (Use g_utf8_validate() on all text before trying to use UTF-8 682 * utility functions with it.) 683 * Params: 684 * dest = buffer to fill with characters from src 685 * src = UTF-8 encoded string 686 * n = character count 687 * Returns: dest 688 */ 689 public static string utf8_Strncpy(string dest, string src, gsize n) 690 { 691 // gchar * g_utf8_strncpy (gchar *dest, const gchar *src, gsize n); 692 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n)); 693 } 694 695 /** 696 * Finds the leftmost occurrence of the given Unicode character 697 * in a UTF-8 encoded string, while limiting the search to len bytes. 698 * If len is -1, allow unbounded search. 699 * Params: 700 * p = a nul-terminated UTF-8 encoded string 701 * len = the maximum length of p 702 * c = a Unicode character 703 * Returns: NULL if the string does not contain the character, otherwise, a pointer to the start of the leftmost occurrence of the character in the string. 704 */ 705 public static string utf8_Strchr(string p, gssize len, gunichar c) 706 { 707 // gchar * g_utf8_strchr (const gchar *p, gssize len, gunichar c); 708 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c)); 709 } 710 711 /** 712 * Find the rightmost occurrence of the given Unicode character 713 * in a UTF-8 encoded string, while limiting the search to len bytes. 714 * If len is -1, allow unbounded search. 715 * Params: 716 * p = a nul-terminated UTF-8 encoded string 717 * len = the maximum length of p 718 * c = a Unicode character 719 * Returns: NULL if the string does not contain the character, otherwise, a pointer to the start of the rightmost occurrence of the character in the string. 720 */ 721 public static string utf8_Strrchr(string p, gssize len, gunichar c) 722 { 723 // gchar * g_utf8_strrchr (const gchar *p, gssize len, gunichar c); 724 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c)); 725 } 726 727 /** 728 * Reverses a UTF-8 string. str must be valid UTF-8 encoded text. 729 * (Use g_utf8_validate() on all text before trying to use UTF-8 730 * utility functions with it.) 731 * This function is intended for programmatic uses of reversed strings. 732 * It pays no attention to decomposed characters, combining marks, byte 733 * order marks, directional indicators (LRM, LRO, etc) and similar 734 * characters which might need special handling when reversing a string 735 * for display purposes. 736 * Note that unlike g_strreverse(), this function returns 737 * newly-allocated memory, which should be freed with g_free() when 738 * no longer needed. 739 * Since 2.2 740 * Params: 741 * str = a UTF-8 encoded string 742 * len = the maximum length of str to use, in bytes. If len < 0, 743 * then the string is nul-terminated. 744 * Returns: a newly-allocated string which is the reverse of str. 745 */ 746 public static string utf8_Strreverse(string str, gssize len) 747 { 748 // gchar * g_utf8_strreverse (const gchar *str, gssize len); 749 return Str.toString(g_utf8_strreverse(Str.toStringz(str), len)); 750 } 751 752 /** 753 * Validates UTF-8 encoded text. str is the text to validate; 754 * if str is nul-terminated, then max_len can be -1, otherwise 755 * max_len should be the number of bytes to validate. 756 * If end is non-NULL, then the end of the valid range 757 * will be stored there (i.e. the start of the first invalid 758 * character if some bytes were invalid, or the end of the text 759 * being validated otherwise). 760 * Note that g_utf8_validate() returns FALSE if max_len is 761 * positive and NUL is met before max_len bytes have been read. 762 * Returns TRUE if all of str was valid. Many GLib and GTK+ 763 * routines require valid UTF-8 as input; 764 * so data read from a file or the network should be checked 765 * with g_utf8_validate() before doing anything else with it. 766 * Params: 767 * str = a pointer to character data 768 * maxLen = max bytes to validate, or -1 to go until NUL 769 * end = return location for end of valid data 770 * Returns: TRUE if the text was valid UTF-8 771 */ 772 public static int utf8_Validate(string str, gssize maxLen, out string end) 773 { 774 // gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end); 775 char* outend = null; 776 777 auto p = g_utf8_validate(Str.toStringz(str), maxLen, &outend); 778 779 end = Str.toString(outend); 780 return p; 781 } 782 783 /** 784 * Converts all Unicode characters in the string that have a case 785 * to uppercase. The exact manner that this is done depends 786 * on the current locale, and may result in the number of 787 * characters in the string increasing. (For instance, the 788 * German ess-zet will be changed to SS.) 789 * Params: 790 * str = a UTF-8 encoded string 791 * len = length of str, in bytes, or -1 if str is nul-terminated. 792 * Returns: a newly allocated string, with all characters converted to uppercase. 793 */ 794 public static string utf8_Strup(string str, gssize len) 795 { 796 // gchar * g_utf8_strup (const gchar *str, gssize len); 797 return Str.toString(g_utf8_strup(Str.toStringz(str), len)); 798 } 799 800 /** 801 * Converts all Unicode characters in the string that have a case 802 * to lowercase. The exact manner that this is done depends 803 * on the current locale, and may result in the number of 804 * characters in the string changing. 805 * Params: 806 * str = a UTF-8 encoded string 807 * len = length of str, in bytes, or -1 if str is nul-terminated. 808 * Returns: a newly allocated string, with all characters converted to lowercase. 809 */ 810 public static string utf8_Strdown(string str, gssize len) 811 { 812 // gchar * g_utf8_strdown (const gchar *str, gssize len); 813 return Str.toString(g_utf8_strdown(Str.toStringz(str), len)); 814 } 815 816 /** 817 * Converts a string into a form that is independent of case. The 818 * result will not correspond to any particular case, but can be 819 * compared for equality or ordered with the results of calling 820 * g_utf8_casefold() on other strings. 821 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is 822 * only an approximation to the correct linguistic case insensitive 823 * ordering, though it is a fairly good one. Getting this exactly 824 * right would require a more sophisticated collation function that 825 * takes case sensitivity into account. GLib does not currently 826 * provide such a function. 827 * Params: 828 * str = a UTF-8 encoded string 829 * len = length of str, in bytes, or -1 if str is nul-terminated. 830 * Returns: a newly allocated string, that is a case independent form of str. 831 */ 832 public static string utf8_Casefold(string str, gssize len) 833 { 834 // gchar * g_utf8_casefold (const gchar *str, gssize len); 835 return Str.toString(g_utf8_casefold(Str.toStringz(str), len)); 836 } 837 838 /** 839 * Converts a string into canonical form, standardizing 840 * such issues as whether a character with an accent 841 * is represented as a base character and combining 842 * accent or as a single precomposed character. The 843 * string has to be valid UTF-8, otherwise NULL is 844 * returned. You should generally call g_utf8_normalize() 845 * before comparing two Unicode strings. 846 * The normalization mode G_NORMALIZE_DEFAULT only 847 * standardizes differences that do not affect the 848 * text content, such as the above-mentioned accent 849 * representation. G_NORMALIZE_ALL also standardizes 850 * the "compatibility" characters in Unicode, such 851 * as SUPERSCRIPT THREE to the standard forms 852 * (in this case DIGIT THREE). Formatting information 853 * may be lost but for most text operations such 854 * characters should be considered the same. 855 * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE 856 * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL, 857 * but returned a result with composed forms rather 858 * than a maximally decomposed form. This is often 859 * useful if you intend to convert the string to 860 * a legacy encoding or pass it to a system with 861 * less capable Unicode handling. 862 * Params: 863 * str = a UTF-8 encoded string. 864 * len = length of str, in bytes, or -1 if str is nul-terminated. 865 * mode = the type of normalization to perform. 866 * Returns: a newly allocated string, that is the normalized form of str, or NULL if str is not valid UTF-8. 867 */ 868 public static string utf8_Normalize(string str, gssize len, GNormalizeMode mode) 869 { 870 // gchar * g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode); 871 return Str.toString(g_utf8_normalize(Str.toStringz(str), len, mode)); 872 } 873 874 /** 875 * Compares two strings for ordering using the linguistically 876 * correct rules for the current locale. 877 * When sorting a large number of strings, it will be significantly 878 * faster to obtain collation keys with g_utf8_collate_key() and 879 * compare the keys with strcmp() when sorting instead of sorting 880 * the original strings. 881 * Params: 882 * str1 = a UTF-8 encoded string 883 * str2 = a UTF-8 encoded string 884 * Returns: < 0 if str1 compares before str2, 0 if they compare equal, > 0 if str1 compares after str2. 885 */ 886 public static int utf8_Collate(string str1, string str2) 887 { 888 // gint g_utf8_collate (const gchar *str1, const gchar *str2); 889 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2)); 890 } 891 892 /** 893 * Converts a string into a collation key that can be compared 894 * with other collation keys produced by the same function using 895 * strcmp(). 896 * The results of comparing the collation keys of two strings 897 * with strcmp() will always be the same as comparing the two 898 * original keys with g_utf8_collate(). 899 * Note that this function depends on the 900 * current locale. 901 * Params: 902 * str = a UTF-8 encoded string. 903 * len = length of str, in bytes, or -1 if str is nul-terminated. 904 * Returns: a newly allocated string. This string should be freed with g_free() when you are done with it. 905 */ 906 public static string utf8_CollateKey(string str, gssize len) 907 { 908 // gchar * g_utf8_collate_key (const gchar *str, gssize len); 909 return Str.toString(g_utf8_collate_key(Str.toStringz(str), len)); 910 } 911 912 /** 913 * Converts a string into a collation key that can be compared 914 * with other collation keys produced by the same function using strcmp(). 915 * In order to sort filenames correctly, this function treats the dot '.' 916 * as a special case. Most dictionary orderings seem to consider it 917 * insignificant, thus producing the ordering "event.c" "eventgenerator.c" 918 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we 919 * would like to treat numbers intelligently so that "file1" "file10" "file5" 920 * is sorted as "file1" "file5" "file10". 921 * Note that this function depends on the 922 * current locale. 923 * Since 2.8 924 * Params: 925 * str = a UTF-8 encoded string. 926 * len = length of str, in bytes, or -1 if str is nul-terminated. 927 * Returns: a newly allocated string. This string should be freed with g_free() when you are done with it. 928 */ 929 public static string utf8_CollateKeyForFilename(string str, gssize len) 930 { 931 // gchar * g_utf8_collate_key_for_filename (const gchar *str, gssize len); 932 return Str.toString(g_utf8_collate_key_for_filename(Str.toStringz(str), len)); 933 } 934 935 /** 936 * Convert a string from UTF-8 to UTF-16. A 0 character will be 937 * added to the result after the converted text. 938 * Params: 939 * str = a UTF-8 encoded string 940 * len = the maximum length (number of bytes) of str to use. 941 * If len < 0, then the string is nul-terminated. 942 * itemsRead = location to store number of bytes read, or NULL. 943 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 944 * returned in case str contains a trailing partial 945 * character. If an error occurs then the index of the 946 * invalid input is stored here. 947 * itemsWritten = location to store number of gunichar2 written, 948 * or NULL. 949 * The value stored here does not include the trailing 0. 950 * Returns: a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 951 * Throws: GException on failure. 952 */ 953 public static gunichar2* utf8_ToUtf16(string str, glong len, out glong itemsRead, out glong itemsWritten) 954 { 955 // gunichar2 * g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); 956 GError* err = null; 957 958 auto p = g_utf8_to_utf16(Str.toStringz(str), len, &itemsRead, &itemsWritten, &err); 959 960 if (err !is null) 961 { 962 throw new GException( new ErrorG(err) ); 963 } 964 965 return p; 966 } 967 968 /** 969 * Convert a string from UTF-8 to a 32-bit fixed width 970 * representation as UCS-4. A trailing 0 will be added to the 971 * string after the converted text. 972 * Params: 973 * str = a UTF-8 encoded string 974 * len = the maximum length of str to use, in bytes. If len < 0, 975 * then the string is nul-terminated. 976 * itemsRead = location to store number of bytes read, or NULL. 977 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 978 * returned in case str contains a trailing partial 979 * character. If an error occurs then the index of the 980 * invalid input is stored here. 981 * itemsWritten = location to store number of characters written or NULL. 982 * The value here stored does not include the trailing 0 983 * character. 984 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 985 * Throws: GException on failure. 986 */ 987 public static gunichar* utf8_ToUcs4(string str, glong len, out glong itemsRead, out glong itemsWritten) 988 { 989 // gunichar * g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); 990 GError* err = null; 991 992 auto p = g_utf8_to_ucs4(Str.toStringz(str), len, &itemsRead, &itemsWritten, &err); 993 994 if (err !is null) 995 { 996 throw new GException( new ErrorG(err) ); 997 } 998 999 return p; 1000 } 1001 1002 /** 1003 * Convert a string from UTF-8 to a 32-bit fixed width 1004 * representation as UCS-4, assuming valid UTF-8 input. 1005 * This function is roughly twice as fast as g_utf8_to_ucs4() 1006 * but does no error checking on the input. 1007 * Params: 1008 * str = a UTF-8 encoded string 1009 * len = the maximum length of str to use, in bytes. If len < 0, 1010 * then the string is nul-terminated. 1011 * itemsWritten = location to store the number of characters in the 1012 * result, or NULL. 1013 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). 1014 */ 1015 public static gunichar* utf8_ToUcs4_Fast(string str, glong len, out glong itemsWritten) 1016 { 1017 // gunichar * g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written); 1018 return g_utf8_to_ucs4_fast(Str.toStringz(str), len, &itemsWritten); 1019 } 1020 1021 /** 1022 * Convert a string from UTF-16 to UCS-4. The result will be 1023 * nul-terminated. 1024 * Params: 1025 * str = a UTF-16 encoded string 1026 * len = the maximum length (number of gunichar2) of str to use. 1027 * If len < 0, then the string is nul-terminated. 1028 * itemsRead = location to store number of words read, or NULL. 1029 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1030 * returned in case str contains a trailing partial 1031 * character. If an error occurs then the index of the 1032 * invalid input is stored here. 1033 * itemsWritten = location to store number of characters written, or NULL. 1034 * The value stored here does not include the trailing 1035 * 0 character. 1036 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1037 * Throws: GException on failure. 1038 */ 1039 public static gunichar* utf16_ToUcs4(gunichar2* str, glong len, out glong itemsRead, out glong itemsWritten) 1040 { 1041 // gunichar * g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); 1042 GError* err = null; 1043 1044 auto p = g_utf16_to_ucs4(str, len, &itemsRead, &itemsWritten, &err); 1045 1046 if (err !is null) 1047 { 1048 throw new GException( new ErrorG(err) ); 1049 } 1050 1051 return p; 1052 } 1053 1054 /** 1055 * Convert a string from UTF-16 to UTF-8. The result will be 1056 * terminated with a 0 byte. 1057 * Note that the input is expected to be already in native endianness, 1058 * an initial byte-order-mark character is not handled specially. 1059 * g_convert() can be used to convert a byte buffer of UTF-16 data of 1060 * ambiguous endianess. 1061 * Further note that this function does not validate the result 1062 * string; it may e.g. include embedded NUL characters. The only 1063 * validation done by this function is to ensure that the input can 1064 * be correctly interpreted as UTF-16, i.e. it doesn't contain 1065 * things unpaired surrogates. 1066 * Params: 1067 * str = a UTF-16 encoded string 1068 * len = the maximum length (number of gunichar2) of str to use. 1069 * If len < 0, then the string is nul-terminated. 1070 * itemsRead = location to store number of words read, or NULL. 1071 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1072 * returned in case str contains a trailing partial 1073 * character. If an error occurs then the index of the 1074 * invalid input is stored here. 1075 * itemsWritten = location to store number of bytes written, or NULL. 1076 * The value stored here does not include the trailing 1077 * 0 byte. 1078 * Returns: a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1079 * Throws: GException on failure. 1080 */ 1081 public static string utf16_ToUtf8(gunichar2* str, glong len, out glong itemsRead, out glong itemsWritten) 1082 { 1083 // gchar * g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); 1084 GError* err = null; 1085 1086 auto p = g_utf16_to_utf8(str, len, &itemsRead, &itemsWritten, &err); 1087 1088 if (err !is null) 1089 { 1090 throw new GException( new ErrorG(err) ); 1091 } 1092 1093 return Str.toString(p); 1094 } 1095 1096 /** 1097 * Convert a string from UCS-4 to UTF-16. A 0 character will be 1098 * added to the result after the converted text. 1099 * Params: 1100 * str = a UCS-4 encoded string 1101 * len = the maximum length (number of characters) of str to use. 1102 * If len < 0, then the string is nul-terminated. 1103 * itemsRead = location to store number of bytes read, or NULL. 1104 * If an error occurs then the index of the invalid input 1105 * is stored here. 1106 * itemsWritten = location to store number of gunichar2 1107 * written, or NULL. The value stored here does not 1108 * include the trailing 0. 1109 * Returns: a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1110 * Throws: GException on failure. 1111 */ 1112 public static gunichar2* ucs4_ToUtf16(gunichar* str, glong len, out glong itemsRead, out glong itemsWritten) 1113 { 1114 // gunichar2 * g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); 1115 GError* err = null; 1116 1117 auto p = g_ucs4_to_utf16(str, len, &itemsRead, &itemsWritten, &err); 1118 1119 if (err !is null) 1120 { 1121 throw new GException( new ErrorG(err) ); 1122 } 1123 1124 return p; 1125 } 1126 1127 /** 1128 * Convert a string from a 32-bit fixed width representation as UCS-4. 1129 * to UTF-8. The result will be terminated with a 0 byte. 1130 * Params: 1131 * str = a UCS-4 encoded string 1132 * len = the maximum length (number of characters) of str to use. 1133 * If len < 0, then the string is nul-terminated. 1134 * itemsRead = location to store number of characters read, or NULL. 1135 * itemsWritten = location to store number of bytes written or NULL. 1136 * The value here stored does not include the trailing 0 1137 * byte. 1138 * Returns: a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. In that case, items_read will be set to the position of the first invalid input character. 1139 * Throws: GException on failure. 1140 */ 1141 public static string ucs4_ToUtf8(gunichar* str, glong len, out glong itemsRead, out glong itemsWritten) 1142 { 1143 // gchar * g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); 1144 GError* err = null; 1145 1146 auto p = g_ucs4_to_utf8(str, len, &itemsRead, &itemsWritten, &err); 1147 1148 if (err !is null) 1149 { 1150 throw new GException( new ErrorG(err) ); 1151 } 1152 1153 return Str.toString(p); 1154 } 1155 1156 /** 1157 * Converts a single character to UTF-8. 1158 * Params: 1159 * c = a Unicode character code 1160 * outbuf = output buffer, must have at least 6 bytes of space. 1161 * If NULL, the length will be computed and returned 1162 * and nothing will be written to outbuf. 1163 * Returns: number of bytes written 1164 */ 1165 public static int unicharToUtf8(gunichar c, string outbuf) 1166 { 1167 // gint g_unichar_to_utf8 (gunichar c, gchar *outbuf); 1168 return g_unichar_to_utf8(c, Str.toStringz(outbuf)); 1169 } 1170 }