1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 /* 25 * Conversion parameters: 26 * inFile = glib-Unicode-Manipulation.html 27 * outPack = glib 28 * outFile = Unicode 29 * strct = 30 * realStrct= 31 * ctorStrct= 32 * clss = Unicode 33 * interf = 34 * class Code: No 35 * interface Code: No 36 * template for: 37 * extend = 38 * implements: 39 * prefixes: 40 * - g_ 41 * omit structs: 42 * omit prefixes: 43 * omit code: 44 * omit signals: 45 * imports: 46 * - glib.Str 47 * - glib.ErrorG 48 * - glib.GException 49 * structWrap: 50 * module aliases: 51 * local aliases: 52 * overrides: 53 */ 54 55 module glib.Unicode; 56 57 public import gtkc.glibtypes; 58 59 private import gtkc.glib; 60 private import glib.ConstructionException; 61 62 63 private import glib.Str; 64 private import glib.ErrorG; 65 private import glib.GException; 66 67 68 69 70 /** 71 * This section describes a number of functions for dealing with 72 * Unicode characters and strings. There are analogues of the 73 * traditional ctype.h character classification 74 * and case conversion functions, UTF-8 analogues of some string utility 75 * functions, functions to perform normalization, case conversion and 76 * collation on UTF-8 strings and finally functions to convert between 77 * the UTF-8, UTF-16 and UCS-4 encodings of Unicode. 78 * 79 * The implementations of the Unicode functions in GLib are based 80 * on the Unicode Character Data tables, which are available from 81 * www.unicode.org. 82 * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1, 83 * GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1, 84 * GLib 2.30 supports Unicode 6.0. 85 */ 86 public class Unicode 87 { 88 89 /** 90 */ 91 92 /** 93 * Checks whether ch is a valid Unicode character. Some possible 94 * integer values of ch will not be valid. 0 is considered a valid 95 * character, though it's normally a string terminator. 96 * Params: 97 * ch = a Unicode character 98 * Returns: TRUE if ch is a valid Unicode character 99 */ 100 public static int unicharValidate(gunichar ch) 101 { 102 // gboolean g_unichar_validate (gunichar ch); 103 return g_unichar_validate(ch); 104 } 105 106 /** 107 * Determines whether a character is alphanumeric. 108 * Given some UTF-8 text, obtain a character value 109 * with g_utf8_get_char(). 110 * Params: 111 * c = a Unicode character 112 * Returns: TRUE if c is an alphanumeric character 113 */ 114 public static int unicharIsalnum(gunichar c) 115 { 116 // gboolean g_unichar_isalnum (gunichar c); 117 return g_unichar_isalnum(c); 118 } 119 120 /** 121 * Determines whether a character is alphabetic (i.e. a letter). 122 * Given some UTF-8 text, obtain a character value with 123 * g_utf8_get_char(). 124 * Params: 125 * c = a Unicode character 126 * Returns: TRUE if c is an alphabetic character 127 */ 128 public static int unicharIsalpha(gunichar c) 129 { 130 // gboolean g_unichar_isalpha (gunichar c); 131 return g_unichar_isalpha(c); 132 } 133 134 /** 135 * Determines whether a character is a control character. 136 * Given some UTF-8 text, obtain a character value with 137 * g_utf8_get_char(). 138 * Params: 139 * c = a Unicode character 140 * Returns: TRUE if c is a control character 141 */ 142 public static int unicharIscntrl(gunichar c) 143 { 144 // gboolean g_unichar_iscntrl (gunichar c); 145 return g_unichar_iscntrl(c); 146 } 147 148 /** 149 * Determines if a given character is assigned in the Unicode 150 * standard. 151 * Params: 152 * c = a Unicode character 153 * Returns: TRUE if the character has an assigned value 154 */ 155 public static int unicharIsdefined(gunichar c) 156 { 157 // gboolean g_unichar_isdefined (gunichar c); 158 return g_unichar_isdefined(c); 159 } 160 161 /** 162 * Determines whether a character is numeric (i.e. a digit). This 163 * covers ASCII 0-9 and also digits in other languages/scripts. Given 164 * some UTF-8 text, obtain a character value with g_utf8_get_char(). 165 * Params: 166 * c = a Unicode character 167 * Returns: TRUE if c is a digit 168 */ 169 public static int unicharIsdigit(gunichar c) 170 { 171 // gboolean g_unichar_isdigit (gunichar c); 172 return g_unichar_isdigit(c); 173 } 174 175 /** 176 * Determines whether a character is printable and not a space 177 * (returns FALSE for control characters, format characters, and 178 * spaces). g_unichar_isprint() is similar, but returns TRUE for 179 * spaces. Given some UTF-8 text, obtain a character value with 180 * g_utf8_get_char(). 181 * Params: 182 * c = a Unicode character 183 * Returns: TRUE if c is printable unless it's a space 184 */ 185 public static int unicharIsgraph(gunichar c) 186 { 187 // gboolean g_unichar_isgraph (gunichar c); 188 return g_unichar_isgraph(c); 189 } 190 191 /** 192 * Determines whether a character is a lowercase letter. 193 * Given some UTF-8 text, obtain a character value with 194 * g_utf8_get_char(). 195 * Params: 196 * c = a Unicode character 197 * Returns: TRUE if c is a lowercase letter 198 */ 199 public static int unicharIslower(gunichar c) 200 { 201 // gboolean g_unichar_islower (gunichar c); 202 return g_unichar_islower(c); 203 } 204 205 /** 206 * Determines whether a character is a mark (non-spacing mark, 207 * combining mark, or enclosing mark in Unicode speak). 208 * Given some UTF-8 text, obtain a character value 209 * with g_utf8_get_char(). 210 * Note: in most cases where isalpha characters are allowed, 211 * ismark characters should be allowed to as they are essential 212 * for writing most European languages as well as many non-Latin 213 * scripts. 214 * Since 2.14 215 * Params: 216 * c = a Unicode character 217 * Returns: TRUE if c is a mark character 218 */ 219 public static int unicharIsmark(gunichar c) 220 { 221 // gboolean g_unichar_ismark (gunichar c); 222 return g_unichar_ismark(c); 223 } 224 225 /** 226 * Determines whether a character is printable. 227 * Unlike g_unichar_isgraph(), returns TRUE for spaces. 228 * Given some UTF-8 text, obtain a character value with 229 * g_utf8_get_char(). 230 * Params: 231 * c = a Unicode character 232 * Returns: TRUE if c is printable 233 */ 234 public static int unicharIsprint(gunichar c) 235 { 236 // gboolean g_unichar_isprint (gunichar c); 237 return g_unichar_isprint(c); 238 } 239 240 /** 241 * Determines whether a character is punctuation or a symbol. 242 * Given some UTF-8 text, obtain a character value with 243 * g_utf8_get_char(). 244 * Params: 245 * c = a Unicode character 246 * Returns: TRUE if c is a punctuation or symbol character 247 */ 248 public static int unicharIspunct(gunichar c) 249 { 250 // gboolean g_unichar_ispunct (gunichar c); 251 return g_unichar_ispunct(c); 252 } 253 254 /** 255 * Determines whether a character is a space, tab, or line separator 256 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a 257 * character value with g_utf8_get_char(). 258 * (Note: don't use this to do word breaking; you have to use 259 * Pango or equivalent to get word breaking right, the algorithm 260 * is fairly complex.) 261 * Params: 262 * c = a Unicode character 263 * Returns: TRUE if c is a space character 264 */ 265 public static int unicharIsspace(gunichar c) 266 { 267 // gboolean g_unichar_isspace (gunichar c); 268 return g_unichar_isspace(c); 269 } 270 271 /** 272 * Determines if a character is titlecase. Some characters in 273 * Unicode which are composites, such as the DZ digraph 274 * have three case variants instead of just two. The titlecase 275 * form is used at the beginning of a word where only the 276 * first letter is capitalized. The titlecase form of the DZ 277 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. 278 * Params: 279 * c = a Unicode character 280 * Returns: TRUE if the character is titlecase 281 */ 282 public static int unicharIstitle(gunichar c) 283 { 284 // gboolean g_unichar_istitle (gunichar c); 285 return g_unichar_istitle(c); 286 } 287 288 /** 289 * Determines if a character is uppercase. 290 * Params: 291 * c = a Unicode character 292 * Returns: TRUE if c is an uppercase character 293 */ 294 public static int unicharIsupper(gunichar c) 295 { 296 // gboolean g_unichar_isupper (gunichar c); 297 return g_unichar_isupper(c); 298 } 299 300 /** 301 * Determines if a character is a hexidecimal digit. 302 * Params: 303 * c = a Unicode character. 304 * Returns: TRUE if the character is a hexadecimal digit 305 */ 306 public static int unicharIsxdigit(gunichar c) 307 { 308 // gboolean g_unichar_isxdigit (gunichar c); 309 return g_unichar_isxdigit(c); 310 } 311 312 /** 313 * Determines if a character is typically rendered in a double-width 314 * cell. 315 * Params: 316 * c = a Unicode character 317 * Returns: TRUE if the character is wide 318 */ 319 public static int unicharIswide(gunichar c) 320 { 321 // gboolean g_unichar_iswide (gunichar c); 322 return g_unichar_iswide(c); 323 } 324 325 /** 326 * Determines if a character is typically rendered in a double-width 327 * cell under legacy East Asian locales. If a character is wide according to 328 * g_unichar_iswide(), then it is also reported wide with this function, but 329 * the converse is not necessarily true. See the 330 * Unicode Standard 331 * Annex #11 for details. 332 * If a character passes the g_unichar_iswide() test then it will also pass 333 * this test, but not the other way around. Note that some characters may 334 * pas both this test and g_unichar_iszerowidth(). 335 * Since 2.12 336 * Params: 337 * c = a Unicode character 338 * Returns: TRUE if the character is wide in legacy East Asian locales 339 */ 340 public static int unicharIswideCjk(gunichar c) 341 { 342 // gboolean g_unichar_iswide_cjk (gunichar c); 343 return g_unichar_iswide_cjk(c); 344 } 345 346 /** 347 * Determines if a given character typically takes zero width when rendered. 348 * The return value is TRUE for all non-spacing and enclosing marks 349 * (e.g., combining accents), format characters, zero-width 350 * space, but not U+00AD SOFT HYPHEN. 351 * A typical use of this function is with one of g_unichar_iswide() or 352 * g_unichar_iswide_cjk() to determine the number of cells a string occupies 353 * when displayed on a grid display (terminals). However, note that not all 354 * terminals support zero-width rendering of zero-width marks. 355 * Since 2.14 356 * Params: 357 * c = a Unicode character 358 * Returns: TRUE if the character has zero width 359 */ 360 public static int unicharIszerowidth(gunichar c) 361 { 362 // gboolean g_unichar_iszerowidth (gunichar c); 363 return g_unichar_iszerowidth(c); 364 } 365 366 /** 367 * Converts a character to uppercase. 368 * Params: 369 * c = a Unicode character 370 * Returns: the result of converting c to uppercase. If c is not an lowercase or titlecase character, or has no upper case equivalent c is returned unchanged. 371 */ 372 public static gunichar unicharToupper(gunichar c) 373 { 374 // gunichar g_unichar_toupper (gunichar c); 375 return g_unichar_toupper(c); 376 } 377 378 /** 379 * Converts a character to lower case. 380 * Params: 381 * c = a Unicode character. 382 * Returns: the result of converting c to lower case. If c is not an upperlower or titlecase character, or has no lowercase equivalent c is returned unchanged. 383 */ 384 public static gunichar unicharTolower(gunichar c) 385 { 386 // gunichar g_unichar_tolower (gunichar c); 387 return g_unichar_tolower(c); 388 } 389 390 /** 391 * Converts a character to the titlecase. 392 * Params: 393 * c = a Unicode character 394 * Returns: the result of converting c to titlecase. If c is not an uppercase or lowercase character, c is returned unchanged. 395 */ 396 public static gunichar unicharTotitle(gunichar c) 397 { 398 // gunichar g_unichar_totitle (gunichar c); 399 return g_unichar_totitle(c); 400 } 401 402 /** 403 * Determines the numeric value of a character as a decimal 404 * digit. 405 * Params: 406 * c = a Unicode character 407 * Returns: If c is a decimal digit (according to g_unichar_isdigit()), its numeric value. Otherwise, -1. 408 */ 409 public static int unicharDigitValue(gunichar c) 410 { 411 // gint g_unichar_digit_value (gunichar c); 412 return g_unichar_digit_value(c); 413 } 414 415 /** 416 * Determines the numeric value of a character as a hexidecimal 417 * digit. 418 * Params: 419 * c = a Unicode character 420 * Returns: If c is a hex digit (according to g_unichar_isxdigit()), its numeric value. Otherwise, -1. 421 */ 422 public static int unicharXdigitValue(gunichar c) 423 { 424 // gint g_unichar_xdigit_value (gunichar c); 425 return g_unichar_xdigit_value(c); 426 } 427 428 /** 429 * Performs a single composition step of the 430 * Unicode canonical composition algorithm. 431 * This function includes algorithmic Hangul Jamo composition, 432 * but it is not exactly the inverse of g_unichar_decompose(). 433 * No composition can have either of a or b equal to zero. 434 * To be precise, this function composes if and only if 435 * there exists a Primary Composite P which is canonically 436 * equivalent to the sequence <a,b>. See the Unicode 437 * Standard for the definition of Primary Composite. 438 * If a and b do not compose a new character, ch is set to zero. 439 * See UAX#15 440 * for details. 441 * Since 2.30 442 * Params: 443 * a = a Unicode character 444 * b = a Unicode character 445 * ch = return location for the composed character 446 * Returns: TRUE if the characters could be composed 447 */ 448 public static int unicharCompose(gunichar a, gunichar b, out gunichar ch) 449 { 450 // gboolean g_unichar_compose (gunichar a, gunichar b, gunichar *ch); 451 return g_unichar_compose(a, b, &ch); 452 } 453 454 /** 455 * Performs a single decomposition step of the 456 * Unicode canonical decomposition algorithm. 457 * This function does not include compatibility 458 * decompositions. It does, however, include algorithmic 459 * Hangul Jamo decomposition, as well as 'singleton' 460 * decompositions which replace a character by a single 461 * other character. In the case of singletons *b will 462 * be set to zero. 463 * If ch is not decomposable, *a is set to ch and *b 464 * is set to zero. 465 * Note that the way Unicode decomposition pairs are 466 * defined, it is guaranteed that b would not decompose 467 * further, but a may itself decompose. To get the full 468 * canonical decomposition for ch, one would need to 469 * recursively call this function on a. Or use 470 * g_unichar_fully_decompose(). 471 * See UAX#15 472 * for details. 473 * Since 2.30 474 * Params: 475 * ch = a Unicode character 476 * a = return location for the first component of ch 477 * b = return location for the second component of ch 478 * Returns: TRUE if the character could be decomposed 479 */ 480 public static int unicharDecompose(gunichar ch, out gunichar a, out gunichar b) 481 { 482 // gboolean g_unichar_decompose (gunichar ch, gunichar *a, gunichar *b); 483 return g_unichar_decompose(ch, &a, &b); 484 } 485 486 /** 487 * Computes the canonical or compatibility decomposition of a 488 * Unicode character. For compatibility decomposition, 489 * pass TRUE for compat; for canonical decomposition 490 * pass FALSE for compat. 491 * The decomposed sequence is placed in result. Only up to 492 * result_len characters are written into result. The length 493 * of the full decomposition (irrespective of result_len) is 494 * returned by the function. For canonical decomposition, 495 * currently all decompositions are of length at most 4, but 496 * this may change in the future (very unlikely though). 497 * At any rate, Unicode does guarantee that a buffer of length 498 * 18 is always enough for both compatibility and canonical 499 * decompositions, so that is the size recommended. This is provided 500 * as G_UNICHAR_MAX_DECOMPOSITION_LENGTH. 501 * See UAX#15 502 * for details. 503 * Since 2.30 504 * Params: 505 * ch = a Unicode character. 506 * compat = whether perform canonical or compatibility decomposition 507 * result = location to store decomposed result, or NULL. [allow-none] 508 * Returns: the length of the full decomposition. 509 */ 510 public static gsize unicharFullyDecompose(gunichar ch, int compat, gunichar[] result) 511 { 512 // gsize g_unichar_fully_decompose (gunichar ch, gboolean compat, gunichar *result, gsize result_len); 513 return g_unichar_fully_decompose(ch, compat, result.ptr, cast(int) result.length); 514 } 515 516 /** 517 * Classifies a Unicode character by type. 518 * Params: 519 * c = a Unicode character 520 * Returns: the type of the character. 521 */ 522 public static GUnicodeType unicharType(gunichar c) 523 { 524 // GUnicodeType g_unichar_type (gunichar c); 525 return g_unichar_type(c); 526 } 527 528 /** 529 * Determines the break type of c. c should be a Unicode character 530 * (to derive a character from UTF-8 encoded text, use 531 * g_utf8_get_char()). The break type is used to find word and line 532 * breaks ("text boundaries"), Pango implements the Unicode boundary 533 * resolution algorithms and normally you would use a function such 534 * as pango_break() instead of caring about break types yourself. 535 * Params: 536 * c = a Unicode character 537 * Returns: the break type of c 538 */ 539 public static GUnicodeBreakType unicharBreakType(gunichar c) 540 { 541 // GUnicodeBreakType g_unichar_break_type (gunichar c); 542 return g_unichar_break_type(c); 543 } 544 545 /** 546 * Determines the canonical combining class of a Unicode character. 547 * Since 2.14 548 * Params: 549 * uc = a Unicode character 550 * Returns: the combining class of the character 551 */ 552 public static int unicharCombiningClass(gunichar uc) 553 { 554 // gint g_unichar_combining_class (gunichar uc); 555 return g_unichar_combining_class(uc); 556 } 557 558 /** 559 * Computes the canonical ordering of a string in-place. 560 * This rearranges decomposed characters in the string 561 * according to their combining classes. See the Unicode 562 * manual for more information. 563 * Params: 564 * string = a UCS-4 encoded string. 565 */ 566 public static void unicodeCanonicalOrdering(gunichar[] string) 567 { 568 // void g_unicode_canonical_ordering (gunichar *string, gsize len); 569 g_unicode_canonical_ordering(string.ptr, cast(int) string.length); 570 } 571 572 /** 573 * Warning 574 * g_unicode_canonical_decomposition has been deprecated since version 2.30 and should not be used in newly-written code. Use the more flexible g_unichar_fully_decompose() 575 * instead. 576 * Computes the canonical decomposition of a Unicode character. 577 * Params: 578 * ch = a Unicode character. 579 * Returns: a newly allocated string of Unicode characters. result_len is set to the resulting length of the string. 580 */ 581 public static gunichar[] unicodeCanonicalDecomposition(gunichar ch) 582 { 583 // gunichar * g_unicode_canonical_decomposition (gunichar ch, gsize *result_len); 584 gsize resultLen; 585 auto p = g_unicode_canonical_decomposition(ch, &resultLen); 586 587 if(p is null) 588 { 589 return null; 590 } 591 592 return p[0 .. resultLen]; 593 } 594 595 /** 596 * In Unicode, some characters are mirrored. This 597 * means that their images are mirrored horizontally in text that is laid 598 * out from right to left. For instance, "(" would become its mirror image, 599 * ")", in right-to-left text. 600 * If ch has the Unicode mirrored property and there is another unicode 601 * character that typically has a glyph that is the mirror image of ch's 602 * glyph and mirrored_ch is set, it puts that character in the address 603 * pointed to by mirrored_ch. Otherwise the original character is put. 604 * Since 2.4 605 * Params: 606 * ch = a Unicode character 607 * mirroredCh = location to store the mirrored character 608 * Returns: TRUE if ch has a mirrored character, FALSE otherwise 609 */ 610 public static int unicharGetMirrorChar(gunichar ch, out gunichar mirroredCh) 611 { 612 // gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch); 613 return g_unichar_get_mirror_char(ch, &mirroredCh); 614 } 615 616 /** 617 * Looks up the GUnicodeScript for a particular character (as defined 618 * by Unicode Standard Annex #24). No check is made for ch being a 619 * valid Unicode character; if you pass in invalid character, the 620 * result is undefined. 621 * This function is equivalent to pango_script_for_unichar() and the 622 * two are interchangeable. 623 * Since 2.14 624 * Params: 625 * ch = a Unicode character 626 * Returns: the GUnicodeScript for the character. 627 */ 628 public static GUnicodeScript unicharGetScript(gunichar ch) 629 { 630 // GUnicodeScript g_unichar_get_script (gunichar ch); 631 return g_unichar_get_script(ch); 632 } 633 634 /** 635 * Looks up the Unicode script for iso15924. ISO 15924 assigns four-letter 636 * codes to scripts. For example, the code for Arabic is 'Arab'. 637 * This function accepts four letter codes encoded as a guint32 in a 638 * big-endian fashion. That is, the code expected for Arabic is 639 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc). 640 * See Codes for the 641 * representation of names of scripts for details. 642 * Since 2.30 643 * Params: 644 * iso15924 = a Unicode script 645 * Returns: the Unicode script for iso15924, or of G_UNICODE_SCRIPT_INVALID_CODE if iso15924 is zero and G_UNICODE_SCRIPT_UNKNOWN if iso15924 is unknown. 646 */ 647 public static GUnicodeScript unicodeScriptFromIso15924(uint iso15924) 648 { 649 // GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924); 650 return g_unicode_script_from_iso15924(iso15924); 651 } 652 653 /** 654 * Looks up the ISO 15924 code for script. ISO 15924 assigns four-letter 655 * codes to scripts. For example, the code for Arabic is 'Arab'. The 656 * four letter codes are encoded as a guint32 by this function in a 657 * big-endian fashion. That is, the code returned for Arabic is 658 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc). 659 * See Codes for the 660 * representation of names of scripts for details. 661 * Since 2.30 662 * Params: 663 * script = a Unicode script 664 * Returns: the ISO 15924 code for script, encoded as an integer, of zero if script is G_UNICODE_SCRIPT_INVALID_CODE or ISO 15924 code 'Zzzz' (script code for UNKNOWN) if script is not understood. 665 */ 666 public static uint unicodeScriptToIso15924(GUnicodeScript script) 667 { 668 // guint32 g_unicode_script_to_iso15924 (GUnicodeScript script); 669 return g_unicode_script_to_iso15924(script); 670 } 671 672 /** 673 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. 674 * If p does not point to a valid UTF-8 encoded character, results are 675 * undefined. If you are not sure that the bytes are complete 676 * valid Unicode characters, you should use g_utf8_get_char_validated() 677 * instead. 678 * Params: 679 * p = a pointer to Unicode character encoded as UTF-8 680 * Returns: the resulting character 681 */ 682 public static gunichar utf8_GetChar(string p) 683 { 684 // gunichar g_utf8_get_char (const gchar *p); 685 return g_utf8_get_char(Str.toStringz(p)); 686 } 687 688 /** 689 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character. 690 * This function checks for incomplete characters, for invalid characters 691 * such as characters that are out of the range of Unicode, and for 692 * overlong encodings of valid characters. 693 * Params: 694 * p = a pointer to Unicode character encoded as UTF-8 695 * Returns: the resulting character. If p points to a partial sequence at the end of a string that could begin a valid character (or if max_len is zero), returns (gunichar)-2; otherwise, if p does not point to a valid UTF-8 encoded Unicode character, returns (gunichar)-1. 696 */ 697 public static gunichar utf8_GetCharValidated(string p) 698 { 699 // gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len); 700 return g_utf8_get_char_validated(cast(char*)p.ptr, cast(int) p.length); 701 } 702 703 /** 704 * Converts from an integer character offset to a pointer to a position 705 * within the string. 706 * Since 2.10, this function allows to pass a negative offset to 707 * step backwards. It is usually worth stepping backwards from the end 708 * instead of forwards if offset is in the last fourth of the string, 709 * since moving forward is about 3 times faster than moving backward. 710 * Note 711 * This function doesn't abort when reaching the end of str. Therefore 712 * you should be sure that offset is within string boundaries before 713 * calling that function. Call g_utf8_strlen() when unsure. 714 * This limitation exists as this function is called frequently during 715 * text rendering and therefore has to be as fast as possible. 716 * Params: 717 * str = a UTF-8 encoded string 718 * offset = a character offset within str 719 * Returns: the resulting pointer 720 */ 721 public static string utf8_OffsetToPointer(string str, glong offset) 722 { 723 // gchar * g_utf8_offset_to_pointer (const gchar *str, glong offset); 724 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset)); 725 } 726 727 /** 728 * Converts from a pointer to position within a string to a integer 729 * character offset. 730 * Since 2.10, this function allows pos to be before str, and returns 731 * a negative offset in this case. 732 * Params: 733 * str = a UTF-8 encoded string 734 * pos = a pointer to a position within str 735 * Returns: the resulting character offset 736 */ 737 public static glong utf8_PointerToOffset(string str, string pos) 738 { 739 // glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos); 740 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos)); 741 } 742 743 /** 744 * Finds the previous UTF-8 character in the string before p. 745 * p does not have to be at the beginning of a UTF-8 character. No check 746 * is made to see if the character found is actually valid other than 747 * it starts with an appropriate byte. If p might be the first 748 * character of the string, you must use g_utf8_find_prev_char() instead. 749 * Params: 750 * p = a pointer to a position within a UTF-8 encoded string 751 * Returns: a pointer to the found character. 752 */ 753 public static string utf8_PrevChar(string p) 754 { 755 // gchar * g_utf8_prev_char (const gchar *p); 756 return Str.toString(g_utf8_prev_char(Str.toStringz(p))); 757 } 758 759 /** 760 * Finds the start of the next UTF-8 character in the string after p. 761 * p does not have to be at the beginning of a UTF-8 character. No check 762 * is made to see if the character found is actually valid other than 763 * it starts with an appropriate byte. 764 * Params: 765 * p = a pointer to a position within a UTF-8 encoded string 766 * end = a pointer to the byte following the end of the string, 767 * or NULL to indicate that the string is nul-terminated. 768 * Returns: a pointer to the found character or NULL 769 */ 770 public static string utf8_FindNextChar(string p, string end) 771 { 772 // gchar * g_utf8_find_next_char (const gchar *p, const gchar *end); 773 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end))); 774 } 775 776 /** 777 * Given a position p with a UTF-8 encoded string str, find the start 778 * of the previous UTF-8 character starting before p. Returns NULL if no 779 * UTF-8 characters are present in str before p. 780 * p does not have to be at the beginning of a UTF-8 character. No check 781 * is made to see if the character found is actually valid other than 782 * it starts with an appropriate byte. 783 * Params: 784 * str = pointer to the beginning of a UTF-8 encoded string 785 * p = pointer to some position within str 786 * Returns: a pointer to the found character or NULL. 787 */ 788 public static string utf8_FindPrevChar(string str, string p) 789 { 790 // gchar * g_utf8_find_prev_char (const gchar *str, const gchar *p); 791 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p))); 792 } 793 794 /** 795 * Computes the length of the string in characters, not including 796 * the terminating nul character. If the max'th byte falls in the 797 * middle of a character, the last (partial) character is not counted. 798 * Params: 799 * p = pointer to the start of a UTF-8 encoded string 800 * Returns: the length of the string in characters 801 */ 802 public static glong utf8_Strlen(string p) 803 { 804 // glong g_utf8_strlen (const gchar *p, gssize max); 805 return g_utf8_strlen(cast(char*)p.ptr, cast(int) p.length); 806 } 807 808 /** 809 * Like the standard C strncpy() function, but 810 * copies a given number of characters instead of a given number of 811 * bytes. The src string must be valid UTF-8 encoded text. 812 * (Use g_utf8_validate() on all text before trying to use UTF-8 813 * utility functions with it.) 814 * Params: 815 * dest = buffer to fill with characters from src 816 * src = UTF-8 encoded string 817 * n = character count 818 * Returns: dest 819 */ 820 public static string utf8_Strncpy(string dest, string src, gsize n) 821 { 822 // gchar * g_utf8_strncpy (gchar *dest, const gchar *src, gsize n); 823 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n)); 824 } 825 826 /** 827 * Finds the leftmost occurrence of the given Unicode character 828 * in a UTF-8 encoded string, while limiting the search to len bytes. 829 * If len is -1, allow unbounded search. 830 * Params: 831 * p = a nul-terminated UTF-8 encoded string 832 * len = the maximum length of p 833 * c = a Unicode character 834 * Returns: NULL if the string does not contain the character, otherwise, a pointer to the start of the leftmost occurrence of the character in the string. 835 */ 836 public static string utf8_Strchr(string p, gssize len, gunichar c) 837 { 838 // gchar * g_utf8_strchr (const gchar *p, gssize len, gunichar c); 839 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c)); 840 } 841 842 /** 843 * Find the rightmost occurrence of the given Unicode character 844 * in a UTF-8 encoded string, while limiting the search to len bytes. 845 * If len is -1, allow unbounded search. 846 * Params: 847 * p = a nul-terminated UTF-8 encoded string 848 * len = the maximum length of p 849 * c = a Unicode character 850 * Returns: NULL if the string does not contain the character, otherwise, a pointer to the start of the rightmost occurrence of the character in the string. 851 */ 852 public static string utf8_Strrchr(string p, gssize len, gunichar c) 853 { 854 // gchar * g_utf8_strrchr (const gchar *p, gssize len, gunichar c); 855 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c)); 856 } 857 858 /** 859 * Reverses a UTF-8 string. str must be valid UTF-8 encoded text. 860 * (Use g_utf8_validate() on all text before trying to use UTF-8 861 * utility functions with it.) 862 * This function is intended for programmatic uses of reversed strings. 863 * It pays no attention to decomposed characters, combining marks, byte 864 * order marks, directional indicators (LRM, LRO, etc) and similar 865 * characters which might need special handling when reversing a string 866 * for display purposes. 867 * Note that unlike g_strreverse(), this function returns 868 * newly-allocated memory, which should be freed with g_free() when 869 * no longer needed. 870 * Since 2.2 871 * Params: 872 * str = a UTF-8 encoded string 873 * Returns: a newly-allocated string which is the reverse of str. 874 */ 875 public static string utf8_Strreverse(string str) 876 { 877 // gchar * g_utf8_strreverse (const gchar *str, gssize len); 878 return Str.toString(g_utf8_strreverse(cast(char*)str.ptr, cast(int) str.length)); 879 } 880 881 /** 882 * Copies a substring out of a UTF-8 encoded string. 883 * The substring will contain end_pos - start_pos 884 * characters. 885 * Since 2.30 886 * Params: 887 * str = a UTF-8 encoded string 888 * startPos = a character offset within str 889 * endPos = another character offset within str 890 * Returns: a newly allocated copy of the requested substring. Free with g_free() when no longer needed. 891 */ 892 public static string utf8_Substring(string str, glong startPos, glong endPos) 893 { 894 // gchar * g_utf8_substring (const gchar *str, glong start_pos, glong end_pos); 895 return Str.toString(g_utf8_substring(Str.toStringz(str), startPos, endPos)); 896 } 897 898 /** 899 * Validates UTF-8 encoded text. str is the text to validate; 900 * if str is nul-terminated, then max_len can be -1, otherwise 901 * max_len should be the number of bytes to validate. 902 * If end is non-NULL, then the end of the valid range 903 * will be stored there (i.e. the start of the first invalid 904 * character if some bytes were invalid, or the end of the text 905 * being validated otherwise). 906 * Note that g_utf8_validate() returns FALSE if max_len is 907 * positive and any of the max_len bytes are NUL. 908 * Returns TRUE if all of str was valid. Many GLib and GTK+ 909 * routines require valid UTF-8 as input; 910 * so data read from a file or the network should be checked 911 * with g_utf8_validate() before doing anything else with it. 912 * Params: 913 * str = a pointer to character data. [array length=max_len][element-type guint8] 914 * end = return location for end of valid data. [allow-none][out][transfer none] 915 * Returns: TRUE if the text was valid UTF-8 916 */ 917 public static int utf8_Validate(string str, out string end) 918 { 919 // gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end); 920 char* outend = null; 921 922 auto p = g_utf8_validate(cast(char*)str.ptr, cast(int) str.length, &outend); 923 924 end = Str.toString(outend); 925 return p; 926 } 927 928 /** 929 * Converts all Unicode characters in the string that have a case 930 * to uppercase. The exact manner that this is done depends 931 * on the current locale, and may result in the number of 932 * characters in the string increasing. (For instance, the 933 * German ess-zet will be changed to SS.) 934 * Params: 935 * str = a UTF-8 encoded string 936 * Returns: a newly allocated string, with all characters converted to uppercase. 937 */ 938 public static string utf8_Strup(string str) 939 { 940 // gchar * g_utf8_strup (const gchar *str, gssize len); 941 return Str.toString(g_utf8_strup(cast(char*)str.ptr, cast(int) str.length)); 942 } 943 944 /** 945 * Converts all Unicode characters in the string that have a case 946 * to lowercase. The exact manner that this is done depends 947 * on the current locale, and may result in the number of 948 * characters in the string changing. 949 * Params: 950 * str = a UTF-8 encoded string 951 * Returns: a newly allocated string, with all characters converted to lowercase. 952 */ 953 public static string utf8_Strdown(string str) 954 { 955 // gchar * g_utf8_strdown (const gchar *str, gssize len); 956 return Str.toString(g_utf8_strdown(cast(char*)str.ptr, cast(int) str.length)); 957 } 958 959 /** 960 * Converts a string into a form that is independent of case. The 961 * result will not correspond to any particular case, but can be 962 * compared for equality or ordered with the results of calling 963 * g_utf8_casefold() on other strings. 964 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is 965 * only an approximation to the correct linguistic case insensitive 966 * ordering, though it is a fairly good one. Getting this exactly 967 * right would require a more sophisticated collation function that 968 * takes case sensitivity into account. GLib does not currently 969 * provide such a function. 970 * Params: 971 * str = a UTF-8 encoded string 972 * Returns: a newly allocated string, that is a case independent form of str. 973 */ 974 public static string utf8_Casefold(string str) 975 { 976 // gchar * g_utf8_casefold (const gchar *str, gssize len); 977 return Str.toString(g_utf8_casefold(cast(char*)str.ptr, cast(int) str.length)); 978 } 979 980 /** 981 * Converts a string into canonical form, standardizing 982 * such issues as whether a character with an accent 983 * is represented as a base character and combining 984 * accent or as a single precomposed character. The 985 * string has to be valid UTF-8, otherwise NULL is 986 * returned. You should generally call g_utf8_normalize() 987 * before comparing two Unicode strings. 988 * The normalization mode G_NORMALIZE_DEFAULT only 989 * standardizes differences that do not affect the 990 * text content, such as the above-mentioned accent 991 * representation. G_NORMALIZE_ALL also standardizes 992 * the "compatibility" characters in Unicode, such 993 * as SUPERSCRIPT THREE to the standard forms 994 * (in this case DIGIT THREE). Formatting information 995 * may be lost but for most text operations such 996 * characters should be considered the same. 997 * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE 998 * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL, 999 * but returned a result with composed forms rather 1000 * than a maximally decomposed form. This is often 1001 * useful if you intend to convert the string to 1002 * a legacy encoding or pass it to a system with 1003 * less capable Unicode handling. 1004 * Params: 1005 * str = a UTF-8 encoded string. 1006 * mode = the type of normalization to perform. 1007 * Returns: a newly allocated string, that is the normalized form of str, or NULL if str is not valid UTF-8. 1008 */ 1009 public static string utf8_Normalize(string str, GNormalizeMode mode) 1010 { 1011 // gchar * g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode); 1012 return Str.toString(g_utf8_normalize(cast(char*)str.ptr, cast(int) str.length, mode)); 1013 } 1014 1015 /** 1016 * Compares two strings for ordering using the linguistically 1017 * correct rules for the current locale. 1018 * When sorting a large number of strings, it will be significantly 1019 * faster to obtain collation keys with g_utf8_collate_key() and 1020 * compare the keys with strcmp() when sorting instead of sorting 1021 * the original strings. 1022 * Params: 1023 * str1 = a UTF-8 encoded string 1024 * str2 = a UTF-8 encoded string 1025 * Returns: < 0 if str1 compares before str2, 0 if they compare equal, > 0 if str1 compares after str2. 1026 */ 1027 public static int utf8_Collate(string str1, string str2) 1028 { 1029 // gint g_utf8_collate (const gchar *str1, const gchar *str2); 1030 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2)); 1031 } 1032 1033 /** 1034 * Converts a string into a collation key that can be compared 1035 * with other collation keys produced by the same function using 1036 * strcmp(). 1037 * The results of comparing the collation keys of two strings 1038 * with strcmp() will always be the same as comparing the two 1039 * original keys with g_utf8_collate(). 1040 * Note that this function depends on the 1041 * current locale. 1042 * Params: 1043 * str = a UTF-8 encoded string. 1044 * Returns: a newly allocated string. This string should be freed with g_free() when you are done with it. 1045 */ 1046 public static string utf8_CollateKey(string str) 1047 { 1048 // gchar * g_utf8_collate_key (const gchar *str, gssize len); 1049 return Str.toString(g_utf8_collate_key(cast(char*)str.ptr, cast(int) str.length)); 1050 } 1051 1052 /** 1053 * Converts a string into a collation key that can be compared 1054 * with other collation keys produced by the same function using strcmp(). 1055 * In order to sort filenames correctly, this function treats the dot '.' 1056 * as a special case. Most dictionary orderings seem to consider it 1057 * insignificant, thus producing the ordering "event.c" "eventgenerator.c" 1058 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we 1059 * would like to treat numbers intelligently so that "file1" "file10" "file5" 1060 * is sorted as "file1" "file5" "file10". 1061 * Note that this function depends on the 1062 * current locale. 1063 * Since 2.8 1064 * Params: 1065 * str = a UTF-8 encoded string. 1066 * len = length of str, in bytes, or -1 if str is nul-terminated. 1067 * Returns: a newly allocated string. This string should be freed with g_free() when you are done with it. 1068 */ 1069 public static string utf8_CollateKeyForFilename(string str) 1070 { 1071 // gchar * g_utf8_collate_key_for_filename (const gchar *str, gssize len); 1072 return Str.toString(g_utf8_collate_key_for_filename(cast(char*)str.ptr, cast(int) str.length)); 1073 } 1074 1075 /** 1076 * Convert a string from UTF-8 to UTF-16. A 0 character will be 1077 * added to the result after the converted text. 1078 * Params: 1079 * str = a UTF-8 encoded string 1080 * itemsRead = location to store number of bytes read, or NULL. 1081 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1082 * returned in case str contains a trailing partial 1083 * character. If an error occurs then the index of the 1084 * invalid input is stored here. [allow-none] 1085 * Returns: a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1086 * Throws: GException on failure. 1087 */ 1088 public static gunichar2[] utf8_ToUtf16(string str, out glong itemsRead) 1089 { 1090 // gunichar2 * g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); 1091 glong itemsWritten; 1092 GError* err = null; 1093 1094 auto p = g_utf8_to_utf16(cast(char*)str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1095 1096 if (err !is null) 1097 { 1098 throw new GException( new ErrorG(err) ); 1099 } 1100 1101 1102 if(p is null) 1103 { 1104 return null; 1105 } 1106 1107 return p[0 .. itemsWritten]; 1108 } 1109 1110 /** 1111 * Convert a string from UTF-8 to a 32-bit fixed width 1112 * representation as UCS-4. A trailing 0 character will be added to the 1113 * string after the converted text. 1114 * Params: 1115 * str = a UTF-8 encoded string 1116 * itemsRead = location to store number of bytes read, or NULL. 1117 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1118 * returned in case str contains a trailing partial 1119 * character. If an error occurs then the index of the 1120 * invalid input is stored here. [allow-none] 1121 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1122 * Throws: GException on failure. 1123 */ 1124 public static gunichar[] utf8_ToUcs4(string str, out glong itemsRead) 1125 { 1126 // gunichar * g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); 1127 glong itemsWritten; 1128 GError* err = null; 1129 1130 auto p = g_utf8_to_ucs4(cast(char*)str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1131 1132 if (err !is null) 1133 { 1134 throw new GException( new ErrorG(err) ); 1135 } 1136 1137 1138 if(p is null) 1139 { 1140 return null; 1141 } 1142 1143 return p[0 .. itemsWritten]; 1144 } 1145 1146 /** 1147 * Convert a string from UTF-8 to a 32-bit fixed width 1148 * representation as UCS-4, assuming valid UTF-8 input. 1149 * This function is roughly twice as fast as g_utf8_to_ucs4() 1150 * but does no error checking on the input. A trailing 0 character 1151 * will be added to the string after the converted text. 1152 * Params: 1153 * str = a UTF-8 encoded string 1154 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). 1155 */ 1156 public static gunichar[] utf8_ToUcs4_Fast(string str) 1157 { 1158 // gunichar * g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written); 1159 glong itemsWritten; 1160 auto p = g_utf8_to_ucs4_fast(cast(char*)str.ptr, cast(int) str.length, &itemsWritten); 1161 1162 if(p is null) 1163 { 1164 return null; 1165 } 1166 1167 return p[0 .. itemsWritten]; 1168 } 1169 1170 /** 1171 * Convert a string from UTF-16 to UCS-4. The result will be 1172 * nul-terminated. 1173 * Params: 1174 * str = a UTF-16 encoded string 1175 * itemsRead = location to store number of words read, or NULL. 1176 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1177 * returned in case str contains a trailing partial 1178 * character. If an error occurs then the index of the 1179 * invalid input is stored here. [allow-none] 1180 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1181 * Throws: GException on failure. 1182 */ 1183 public static gunichar[] utf16_ToUcs4(gunichar2[] str, out glong itemsRead) 1184 { 1185 // gunichar * g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); 1186 glong itemsWritten; 1187 GError* err = null; 1188 1189 auto p = g_utf16_to_ucs4(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1190 1191 if (err !is null) 1192 { 1193 throw new GException( new ErrorG(err) ); 1194 } 1195 1196 1197 if(p is null) 1198 { 1199 return null; 1200 } 1201 1202 return p[0 .. itemsWritten]; 1203 } 1204 1205 /** 1206 * Convert a string from UTF-16 to UTF-8. The result will be 1207 * terminated with a 0 byte. 1208 * Note that the input is expected to be already in native endianness, 1209 * an initial byte-order-mark character is not handled specially. 1210 * g_convert() can be used to convert a byte buffer of UTF-16 data of 1211 * ambiguous endianess. 1212 * Further note that this function does not validate the result 1213 * string; it may e.g. include embedded NUL characters. The only 1214 * validation done by this function is to ensure that the input can 1215 * be correctly interpreted as UTF-16, i.e. it doesn't contain 1216 * things unpaired surrogates. 1217 * Params: 1218 * str = a UTF-16 encoded string 1219 * itemsRead = location to store number of words read, or NULL. 1220 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1221 * returned in case str contains a trailing partial 1222 * character. If an error occurs then the index of the 1223 * invalid input is stored here. [allow-none] 1224 * Returns: a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1225 * Throws: GException on failure. 1226 */ 1227 public static string utf16_ToUtf8(gunichar2[] str, out glong itemsRead) 1228 { 1229 // gchar * g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); 1230 glong itemsWritten; 1231 GError* err = null; 1232 1233 auto p = g_utf16_to_utf8(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1234 1235 if (err !is null) 1236 { 1237 throw new GException( new ErrorG(err) ); 1238 } 1239 1240 return Str.toString(p, itemsWritten); 1241 } 1242 1243 /** 1244 * Convert a string from UCS-4 to UTF-16. A 0 character will be 1245 * added to the result after the converted text. 1246 * Params: 1247 * str = a UCS-4 encoded string 1248 * itemsRead = location to store number of bytes read, or NULL. 1249 * If an error occurs then the index of the invalid input 1250 * is stored here. [allow-none] 1251 * Returns: a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1252 * Throws: GException on failure. 1253 */ 1254 public static gunichar2[] ucs4_ToUtf16(gunichar[] str, out glong itemsRead) 1255 { 1256 // gunichar2 * g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); 1257 glong itemsWritten; 1258 GError* err = null; 1259 1260 auto p = g_ucs4_to_utf16(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1261 1262 if (err !is null) 1263 { 1264 throw new GException( new ErrorG(err) ); 1265 } 1266 1267 1268 if(p is null) 1269 { 1270 return null; 1271 } 1272 1273 return p[0 .. itemsWritten]; 1274 } 1275 1276 /** 1277 * Convert a string from a 32-bit fixed width representation as UCS-4. 1278 * to UTF-8. The result will be terminated with a 0 byte. 1279 * Params: 1280 * str = a UCS-4 encoded string 1281 * itemsRead = location to store number of characters read, or NULL. [allow-none] 1282 * Returns: a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. In that case, items_read will be set to the position of the first invalid input character. 1283 * Throws: GException on failure. 1284 */ 1285 public static string ucs4_ToUtf8(gunichar[] str, out glong itemsRead) 1286 { 1287 // gchar * g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); 1288 glong itemsWritten; 1289 GError* err = null; 1290 1291 auto p = g_ucs4_to_utf8(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1292 1293 if (err !is null) 1294 { 1295 throw new GException( new ErrorG(err) ); 1296 } 1297 1298 return Str.toString(p, itemsWritten); 1299 } 1300 1301 /** 1302 * Converts a single character to UTF-8. 1303 * Params: 1304 * c = a Unicode character code 1305 * outbuf = output buffer, must have at least 6 bytes of space. 1306 * If NULL, the length will be computed and returned 1307 * and nothing will be written to outbuf. 1308 * Returns: number of bytes written 1309 */ 1310 public static int unicharToUtf8(gunichar c, string outbuf) 1311 { 1312 // gint g_unichar_to_utf8 (gunichar c, gchar *outbuf); 1313 return g_unichar_to_utf8(c, Str.toStringz(outbuf)); 1314 } 1315 }