1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 /* 25 * Conversion parameters: 26 * inFile = glib-Unicode-Manipulation.html 27 * outPack = glib 28 * outFile = Unicode 29 * strct = 30 * realStrct= 31 * ctorStrct= 32 * clss = Unicode 33 * interf = 34 * class Code: No 35 * interface Code: No 36 * template for: 37 * extend = 38 * implements: 39 * prefixes: 40 * - g_ 41 * omit structs: 42 * omit prefixes: 43 * omit code: 44 * omit signals: 45 * imports: 46 * - glib.Str 47 * - glib.ErrorG 48 * - glib.GException 49 * structWrap: 50 * module aliases: 51 * local aliases: 52 * overrides: 53 */ 54 55 module glib.Unicode; 56 57 public import gtkc.glibtypes; 58 59 private import gtkc.glib; 60 private import glib.ConstructionException; 61 62 private import glib.Str; 63 private import glib.ErrorG; 64 private import glib.GException; 65 66 67 68 /** 69 * This section describes a number of functions for dealing with 70 * Unicode characters and strings. There are analogues of the 71 * traditional ctype.h character classification 72 * and case conversion functions, UTF-8 analogues of some string utility 73 * functions, functions to perform normalization, case conversion and 74 * collation on UTF-8 strings and finally functions to convert between 75 * the UTF-8, UTF-16 and UCS-4 encodings of Unicode. 76 * 77 * The implementations of the Unicode functions in GLib are based 78 * on the Unicode Character Data tables, which are available from 79 * www.unicode.org. 80 * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1, 81 * GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1, 82 * GLib 2.30 supports Unicode 6.0. 83 */ 84 public class Unicode 85 { 86 87 /** 88 */ 89 90 /** 91 * Checks whether ch is a valid Unicode character. Some possible 92 * integer values of ch will not be valid. 0 is considered a valid 93 * character, though it's normally a string terminator. 94 * Params: 95 * ch = a Unicode character 96 * Returns: TRUE if ch is a valid Unicode character 97 */ 98 public static int unicharValidate(gunichar ch) 99 { 100 // gboolean g_unichar_validate (gunichar ch); 101 return g_unichar_validate(ch); 102 } 103 104 /** 105 * Determines whether a character is alphanumeric. 106 * Given some UTF-8 text, obtain a character value 107 * with g_utf8_get_char(). 108 * Params: 109 * c = a Unicode character 110 * Returns: TRUE if c is an alphanumeric character 111 */ 112 public static int unicharIsalnum(gunichar c) 113 { 114 // gboolean g_unichar_isalnum (gunichar c); 115 return g_unichar_isalnum(c); 116 } 117 118 /** 119 * Determines whether a character is alphabetic (i.e. a letter). 120 * Given some UTF-8 text, obtain a character value with 121 * g_utf8_get_char(). 122 * Params: 123 * c = a Unicode character 124 * Returns: TRUE if c is an alphabetic character 125 */ 126 public static int unicharIsalpha(gunichar c) 127 { 128 // gboolean g_unichar_isalpha (gunichar c); 129 return g_unichar_isalpha(c); 130 } 131 132 /** 133 * Determines whether a character is a control character. 134 * Given some UTF-8 text, obtain a character value with 135 * g_utf8_get_char(). 136 * Params: 137 * c = a Unicode character 138 * Returns: TRUE if c is a control character 139 */ 140 public static int unicharIscntrl(gunichar c) 141 { 142 // gboolean g_unichar_iscntrl (gunichar c); 143 return g_unichar_iscntrl(c); 144 } 145 146 /** 147 * Determines if a given character is assigned in the Unicode 148 * standard. 149 * Params: 150 * c = a Unicode character 151 * Returns: TRUE if the character has an assigned value 152 */ 153 public static int unicharIsdefined(gunichar c) 154 { 155 // gboolean g_unichar_isdefined (gunichar c); 156 return g_unichar_isdefined(c); 157 } 158 159 /** 160 * Determines whether a character is numeric (i.e. a digit). This 161 * covers ASCII 0-9 and also digits in other languages/scripts. Given 162 * some UTF-8 text, obtain a character value with g_utf8_get_char(). 163 * Params: 164 * c = a Unicode character 165 * Returns: TRUE if c is a digit 166 */ 167 public static int unicharIsdigit(gunichar c) 168 { 169 // gboolean g_unichar_isdigit (gunichar c); 170 return g_unichar_isdigit(c); 171 } 172 173 /** 174 * Determines whether a character is printable and not a space 175 * (returns FALSE for control characters, format characters, and 176 * spaces). g_unichar_isprint() is similar, but returns TRUE for 177 * spaces. Given some UTF-8 text, obtain a character value with 178 * g_utf8_get_char(). 179 * Params: 180 * c = a Unicode character 181 * Returns: TRUE if c is printable unless it's a space 182 */ 183 public static int unicharIsgraph(gunichar c) 184 { 185 // gboolean g_unichar_isgraph (gunichar c); 186 return g_unichar_isgraph(c); 187 } 188 189 /** 190 * Determines whether a character is a lowercase letter. 191 * Given some UTF-8 text, obtain a character value with 192 * g_utf8_get_char(). 193 * Params: 194 * c = a Unicode character 195 * Returns: TRUE if c is a lowercase letter 196 */ 197 public static int unicharIslower(gunichar c) 198 { 199 // gboolean g_unichar_islower (gunichar c); 200 return g_unichar_islower(c); 201 } 202 203 /** 204 * Determines whether a character is a mark (non-spacing mark, 205 * combining mark, or enclosing mark in Unicode speak). 206 * Given some UTF-8 text, obtain a character value 207 * with g_utf8_get_char(). 208 * Note: in most cases where isalpha characters are allowed, 209 * ismark characters should be allowed to as they are essential 210 * for writing most European languages as well as many non-Latin 211 * scripts. 212 * Since 2.14 213 * Params: 214 * c = a Unicode character 215 * Returns: TRUE if c is a mark character 216 */ 217 public static int unicharIsmark(gunichar c) 218 { 219 // gboolean g_unichar_ismark (gunichar c); 220 return g_unichar_ismark(c); 221 } 222 223 /** 224 * Determines whether a character is printable. 225 * Unlike g_unichar_isgraph(), returns TRUE for spaces. 226 * Given some UTF-8 text, obtain a character value with 227 * g_utf8_get_char(). 228 * Params: 229 * c = a Unicode character 230 * Returns: TRUE if c is printable 231 */ 232 public static int unicharIsprint(gunichar c) 233 { 234 // gboolean g_unichar_isprint (gunichar c); 235 return g_unichar_isprint(c); 236 } 237 238 /** 239 * Determines whether a character is punctuation or a symbol. 240 * Given some UTF-8 text, obtain a character value with 241 * g_utf8_get_char(). 242 * Params: 243 * c = a Unicode character 244 * Returns: TRUE if c is a punctuation or symbol character 245 */ 246 public static int unicharIspunct(gunichar c) 247 { 248 // gboolean g_unichar_ispunct (gunichar c); 249 return g_unichar_ispunct(c); 250 } 251 252 /** 253 * Determines whether a character is a space, tab, or line separator 254 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a 255 * character value with g_utf8_get_char(). 256 * (Note: don't use this to do word breaking; you have to use 257 * Pango or equivalent to get word breaking right, the algorithm 258 * is fairly complex.) 259 * Params: 260 * c = a Unicode character 261 * Returns: TRUE if c is a space character 262 */ 263 public static int unicharIsspace(gunichar c) 264 { 265 // gboolean g_unichar_isspace (gunichar c); 266 return g_unichar_isspace(c); 267 } 268 269 /** 270 * Determines if a character is titlecase. Some characters in 271 * Unicode which are composites, such as the DZ digraph 272 * have three case variants instead of just two. The titlecase 273 * form is used at the beginning of a word where only the 274 * first letter is capitalized. The titlecase form of the DZ 275 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. 276 * Params: 277 * c = a Unicode character 278 * Returns: TRUE if the character is titlecase 279 */ 280 public static int unicharIstitle(gunichar c) 281 { 282 // gboolean g_unichar_istitle (gunichar c); 283 return g_unichar_istitle(c); 284 } 285 286 /** 287 * Determines if a character is uppercase. 288 * Params: 289 * c = a Unicode character 290 * Returns: TRUE if c is an uppercase character 291 */ 292 public static int unicharIsupper(gunichar c) 293 { 294 // gboolean g_unichar_isupper (gunichar c); 295 return g_unichar_isupper(c); 296 } 297 298 /** 299 * Determines if a character is a hexidecimal digit. 300 * Params: 301 * c = a Unicode character. 302 * Returns: TRUE if the character is a hexadecimal digit 303 */ 304 public static int unicharIsxdigit(gunichar c) 305 { 306 // gboolean g_unichar_isxdigit (gunichar c); 307 return g_unichar_isxdigit(c); 308 } 309 310 /** 311 * Determines if a character is typically rendered in a double-width 312 * cell. 313 * Params: 314 * c = a Unicode character 315 * Returns: TRUE if the character is wide 316 */ 317 public static int unicharIswide(gunichar c) 318 { 319 // gboolean g_unichar_iswide (gunichar c); 320 return g_unichar_iswide(c); 321 } 322 323 /** 324 * Determines if a character is typically rendered in a double-width 325 * cell under legacy East Asian locales. If a character is wide according to 326 * g_unichar_iswide(), then it is also reported wide with this function, but 327 * the converse is not necessarily true. See the 328 * Unicode Standard 329 * Annex #11 for details. 330 * If a character passes the g_unichar_iswide() test then it will also pass 331 * this test, but not the other way around. Note that some characters may 332 * pas both this test and g_unichar_iszerowidth(). 333 * Since 2.12 334 * Params: 335 * c = a Unicode character 336 * Returns: TRUE if the character is wide in legacy East Asian locales 337 */ 338 public static int unicharIswideCjk(gunichar c) 339 { 340 // gboolean g_unichar_iswide_cjk (gunichar c); 341 return g_unichar_iswide_cjk(c); 342 } 343 344 /** 345 * Determines if a given character typically takes zero width when rendered. 346 * The return value is TRUE for all non-spacing and enclosing marks 347 * (e.g., combining accents), format characters, zero-width 348 * space, but not U+00AD SOFT HYPHEN. 349 * A typical use of this function is with one of g_unichar_iswide() or 350 * g_unichar_iswide_cjk() to determine the number of cells a string occupies 351 * when displayed on a grid display (terminals). However, note that not all 352 * terminals support zero-width rendering of zero-width marks. 353 * Since 2.14 354 * Params: 355 * c = a Unicode character 356 * Returns: TRUE if the character has zero width 357 */ 358 public static int unicharIszerowidth(gunichar c) 359 { 360 // gboolean g_unichar_iszerowidth (gunichar c); 361 return g_unichar_iszerowidth(c); 362 } 363 364 /** 365 * Converts a character to uppercase. 366 * Params: 367 * c = a Unicode character 368 * Returns: the result of converting c to uppercase. If c is not an lowercase or titlecase character, or has no upper case equivalent c is returned unchanged. 369 */ 370 public static gunichar unicharToupper(gunichar c) 371 { 372 // gunichar g_unichar_toupper (gunichar c); 373 return g_unichar_toupper(c); 374 } 375 376 /** 377 * Converts a character to lower case. 378 * Params: 379 * c = a Unicode character. 380 * Returns: the result of converting c to lower case. If c is not an upperlower or titlecase character, or has no lowercase equivalent c is returned unchanged. 381 */ 382 public static gunichar unicharTolower(gunichar c) 383 { 384 // gunichar g_unichar_tolower (gunichar c); 385 return g_unichar_tolower(c); 386 } 387 388 /** 389 * Converts a character to the titlecase. 390 * Params: 391 * c = a Unicode character 392 * Returns: the result of converting c to titlecase. If c is not an uppercase or lowercase character, c is returned unchanged. 393 */ 394 public static gunichar unicharTotitle(gunichar c) 395 { 396 // gunichar g_unichar_totitle (gunichar c); 397 return g_unichar_totitle(c); 398 } 399 400 /** 401 * Determines the numeric value of a character as a decimal 402 * digit. 403 * Params: 404 * c = a Unicode character 405 * Returns: If c is a decimal digit (according to g_unichar_isdigit()), its numeric value. Otherwise, -1. 406 */ 407 public static int unicharDigitValue(gunichar c) 408 { 409 // gint g_unichar_digit_value (gunichar c); 410 return g_unichar_digit_value(c); 411 } 412 413 /** 414 * Determines the numeric value of a character as a hexidecimal 415 * digit. 416 * Params: 417 * c = a Unicode character 418 * Returns: If c is a hex digit (according to g_unichar_isxdigit()), its numeric value. Otherwise, -1. 419 */ 420 public static int unicharXdigitValue(gunichar c) 421 { 422 // gint g_unichar_xdigit_value (gunichar c); 423 return g_unichar_xdigit_value(c); 424 } 425 426 /** 427 * Performs a single composition step of the 428 * Unicode canonical composition algorithm. 429 * This function includes algorithmic Hangul Jamo composition, 430 * but it is not exactly the inverse of g_unichar_decompose(). 431 * No composition can have either of a or b equal to zero. 432 * To be precise, this function composes if and only if 433 * there exists a Primary Composite P which is canonically 434 * equivalent to the sequence <a,b>. See the Unicode 435 * Standard for the definition of Primary Composite. 436 * If a and b do not compose a new character, ch is set to zero. 437 * See UAX#15 438 * for details. 439 * Since 2.30 440 * Params: 441 * a = a Unicode character 442 * b = a Unicode character 443 * ch = return location for the composed character 444 * Returns: TRUE if the characters could be composed 445 */ 446 public static int unicharCompose(gunichar a, gunichar b, out gunichar ch) 447 { 448 // gboolean g_unichar_compose (gunichar a, gunichar b, gunichar *ch); 449 return g_unichar_compose(a, b, &ch); 450 } 451 452 /** 453 * Performs a single decomposition step of the 454 * Unicode canonical decomposition algorithm. 455 * This function does not include compatibility 456 * decompositions. It does, however, include algorithmic 457 * Hangul Jamo decomposition, as well as 'singleton' 458 * decompositions which replace a character by a single 459 * other character. In the case of singletons *b will 460 * be set to zero. 461 * If ch is not decomposable, *a is set to ch and *b 462 * is set to zero. 463 * Note that the way Unicode decomposition pairs are 464 * defined, it is guaranteed that b would not decompose 465 * further, but a may itself decompose. To get the full 466 * canonical decomposition for ch, one would need to 467 * recursively call this function on a. Or use 468 * g_unichar_fully_decompose(). 469 * See UAX#15 470 * for details. 471 * Since 2.30 472 * Params: 473 * ch = a Unicode character 474 * a = return location for the first component of ch 475 * b = return location for the second component of ch 476 * Returns: TRUE if the character could be decomposed 477 */ 478 public static int unicharDecompose(gunichar ch, out gunichar a, out gunichar b) 479 { 480 // gboolean g_unichar_decompose (gunichar ch, gunichar *a, gunichar *b); 481 return g_unichar_decompose(ch, &a, &b); 482 } 483 484 /** 485 * Computes the canonical or compatibility decomposition of a 486 * Unicode character. For compatibility decomposition, 487 * pass TRUE for compat; for canonical decomposition 488 * pass FALSE for compat. 489 * The decomposed sequence is placed in result. Only up to 490 * result_len characters are written into result. The length 491 * of the full decomposition (irrespective of result_len) is 492 * returned by the function. For canonical decomposition, 493 * currently all decompositions are of length at most 4, but 494 * this may change in the future (very unlikely though). 495 * At any rate, Unicode does guarantee that a buffer of length 496 * 18 is always enough for both compatibility and canonical 497 * decompositions, so that is the size recommended. This is provided 498 * as G_UNICHAR_MAX_DECOMPOSITION_LENGTH. 499 * See UAX#15 500 * for details. 501 * Since 2.30 502 * Params: 503 * ch = a Unicode character. 504 * compat = whether perform canonical or compatibility decomposition 505 * result = location to store decomposed result, or NULL. [allow-none] 506 * Returns: the length of the full decomposition. 507 */ 508 public static gsize unicharFullyDecompose(gunichar ch, int compat, gunichar[] result) 509 { 510 // gsize g_unichar_fully_decompose (gunichar ch, gboolean compat, gunichar *result, gsize result_len); 511 return g_unichar_fully_decompose(ch, compat, result.ptr, cast(int) result.length); 512 } 513 514 /** 515 * Classifies a Unicode character by type. 516 * Params: 517 * c = a Unicode character 518 * Returns: the type of the character. 519 */ 520 public static GUnicodeType unicharType(gunichar c) 521 { 522 // GUnicodeType g_unichar_type (gunichar c); 523 return g_unichar_type(c); 524 } 525 526 /** 527 * Determines the break type of c. c should be a Unicode character 528 * (to derive a character from UTF-8 encoded text, use 529 * g_utf8_get_char()). The break type is used to find word and line 530 * breaks ("text boundaries"), Pango implements the Unicode boundary 531 * resolution algorithms and normally you would use a function such 532 * as pango_break() instead of caring about break types yourself. 533 * Params: 534 * c = a Unicode character 535 * Returns: the break type of c 536 */ 537 public static GUnicodeBreakType unicharBreakType(gunichar c) 538 { 539 // GUnicodeBreakType g_unichar_break_type (gunichar c); 540 return g_unichar_break_type(c); 541 } 542 543 /** 544 * Determines the canonical combining class of a Unicode character. 545 * Since 2.14 546 * Params: 547 * uc = a Unicode character 548 * Returns: the combining class of the character 549 */ 550 public static int unicharCombiningClass(gunichar uc) 551 { 552 // gint g_unichar_combining_class (gunichar uc); 553 return g_unichar_combining_class(uc); 554 } 555 556 /** 557 * Computes the canonical ordering of a string in-place. 558 * This rearranges decomposed characters in the string 559 * according to their combining classes. See the Unicode 560 * manual for more information. 561 * Params: 562 * string = a UCS-4 encoded string. 563 */ 564 public static void unicodeCanonicalOrdering(gunichar[] string) 565 { 566 // void g_unicode_canonical_ordering (gunichar *string, gsize len); 567 g_unicode_canonical_ordering(string.ptr, cast(int) string.length); 568 } 569 570 /** 571 * Warning 572 * g_unicode_canonical_decomposition has been deprecated since version 2.30 and should not be used in newly-written code. Use the more flexible g_unichar_fully_decompose() 573 * instead. 574 * Computes the canonical decomposition of a Unicode character. 575 * Params: 576 * ch = a Unicode character. 577 * Returns: a newly allocated string of Unicode characters. result_len is set to the resulting length of the string. 578 */ 579 public static gunichar[] unicodeCanonicalDecomposition(gunichar ch) 580 { 581 // gunichar * g_unicode_canonical_decomposition (gunichar ch, gsize *result_len); 582 gsize resultLen; 583 auto p = g_unicode_canonical_decomposition(ch, &resultLen); 584 585 if(p is null) 586 { 587 return null; 588 } 589 590 return p[0 .. resultLen]; 591 } 592 593 /** 594 * In Unicode, some characters are mirrored. This 595 * means that their images are mirrored horizontally in text that is laid 596 * out from right to left. For instance, "(" would become its mirror image, 597 * ")", in right-to-left text. 598 * If ch has the Unicode mirrored property and there is another unicode 599 * character that typically has a glyph that is the mirror image of ch's 600 * glyph and mirrored_ch is set, it puts that character in the address 601 * pointed to by mirrored_ch. Otherwise the original character is put. 602 * Since 2.4 603 * Params: 604 * ch = a Unicode character 605 * mirroredCh = location to store the mirrored character 606 * Returns: TRUE if ch has a mirrored character, FALSE otherwise 607 */ 608 public static int unicharGetMirrorChar(gunichar ch, out gunichar mirroredCh) 609 { 610 // gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch); 611 return g_unichar_get_mirror_char(ch, &mirroredCh); 612 } 613 614 /** 615 * Looks up the GUnicodeScript for a particular character (as defined 616 * by Unicode Standard Annex #24). No check is made for ch being a 617 * valid Unicode character; if you pass in invalid character, the 618 * result is undefined. 619 * This function is equivalent to pango_script_for_unichar() and the 620 * two are interchangeable. 621 * Since 2.14 622 * Params: 623 * ch = a Unicode character 624 * Returns: the GUnicodeScript for the character. 625 */ 626 public static GUnicodeScript unicharGetScript(gunichar ch) 627 { 628 // GUnicodeScript g_unichar_get_script (gunichar ch); 629 return g_unichar_get_script(ch); 630 } 631 632 /** 633 * Looks up the Unicode script for iso15924. ISO 15924 assigns four-letter 634 * codes to scripts. For example, the code for Arabic is 'Arab'. 635 * This function accepts four letter codes encoded as a guint32 in a 636 * big-endian fashion. That is, the code expected for Arabic is 637 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc). 638 * See Codes for the 639 * representation of names of scripts for details. 640 * Since 2.30 641 * Params: 642 * iso15924 = a Unicode script 643 * Returns: the Unicode script for iso15924, or of G_UNICODE_SCRIPT_INVALID_CODE if iso15924 is zero and G_UNICODE_SCRIPT_UNKNOWN if iso15924 is unknown. 644 */ 645 public static GUnicodeScript unicodeScriptFromIso15924(uint iso15924) 646 { 647 // GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924); 648 return g_unicode_script_from_iso15924(iso15924); 649 } 650 651 /** 652 * Looks up the ISO 15924 code for script. ISO 15924 assigns four-letter 653 * codes to scripts. For example, the code for Arabic is 'Arab'. The 654 * four letter codes are encoded as a guint32 by this function in a 655 * big-endian fashion. That is, the code returned for Arabic is 656 * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc). 657 * See Codes for the 658 * representation of names of scripts for details. 659 * Since 2.30 660 * Params: 661 * script = a Unicode script 662 * Returns: the ISO 15924 code for script, encoded as an integer, of zero if script is G_UNICODE_SCRIPT_INVALID_CODE or ISO 15924 code 'Zzzz' (script code for UNKNOWN) if script is not understood. 663 */ 664 public static uint unicodeScriptToIso15924(GUnicodeScript script) 665 { 666 // guint32 g_unicode_script_to_iso15924 (GUnicodeScript script); 667 return g_unicode_script_to_iso15924(script); 668 } 669 670 /** 671 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. 672 * If p does not point to a valid UTF-8 encoded character, results are 673 * undefined. If you are not sure that the bytes are complete 674 * valid Unicode characters, you should use g_utf8_get_char_validated() 675 * instead. 676 * Params: 677 * p = a pointer to Unicode character encoded as UTF-8 678 * Returns: the resulting character 679 */ 680 public static gunichar utf8_GetChar(string p) 681 { 682 // gunichar g_utf8_get_char (const gchar *p); 683 return g_utf8_get_char(Str.toStringz(p)); 684 } 685 686 /** 687 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character. 688 * This function checks for incomplete characters, for invalid characters 689 * such as characters that are out of the range of Unicode, and for 690 * overlong encodings of valid characters. 691 * Params: 692 * p = a pointer to Unicode character encoded as UTF-8 693 * Returns: the resulting character. If p points to a partial sequence at the end of a string that could begin a valid character (or if max_len is zero), returns (gunichar)-2; otherwise, if p does not point to a valid UTF-8 encoded Unicode character, returns (gunichar)-1. 694 */ 695 public static gunichar utf8_GetCharValidated(string p) 696 { 697 // gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len); 698 return g_utf8_get_char_validated(cast(char*)p.ptr, cast(int) p.length); 699 } 700 701 /** 702 * Converts from an integer character offset to a pointer to a position 703 * within the string. 704 * Since 2.10, this function allows to pass a negative offset to 705 * step backwards. It is usually worth stepping backwards from the end 706 * instead of forwards if offset is in the last fourth of the string, 707 * since moving forward is about 3 times faster than moving backward. 708 * Note 709 * This function doesn't abort when reaching the end of str. Therefore 710 * you should be sure that offset is within string boundaries before 711 * calling that function. Call g_utf8_strlen() when unsure. 712 * This limitation exists as this function is called frequently during 713 * text rendering and therefore has to be as fast as possible. 714 * Params: 715 * str = a UTF-8 encoded string 716 * offset = a character offset within str 717 * Returns: the resulting pointer 718 */ 719 public static string utf8_OffsetToPointer(string str, glong offset) 720 { 721 // gchar * g_utf8_offset_to_pointer (const gchar *str, glong offset); 722 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset)); 723 } 724 725 /** 726 * Converts from a pointer to position within a string to a integer 727 * character offset. 728 * Since 2.10, this function allows pos to be before str, and returns 729 * a negative offset in this case. 730 * Params: 731 * str = a UTF-8 encoded string 732 * pos = a pointer to a position within str 733 * Returns: the resulting character offset 734 */ 735 public static glong utf8_PointerToOffset(string str, string pos) 736 { 737 // glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos); 738 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos)); 739 } 740 741 /** 742 * Finds the previous UTF-8 character in the string before p. 743 * p does not have to be at the beginning of a UTF-8 character. No check 744 * is made to see if the character found is actually valid other than 745 * it starts with an appropriate byte. If p might be the first 746 * character of the string, you must use g_utf8_find_prev_char() instead. 747 * Params: 748 * p = a pointer to a position within a UTF-8 encoded string 749 * Returns: a pointer to the found character. 750 */ 751 public static string utf8_PrevChar(string p) 752 { 753 // gchar * g_utf8_prev_char (const gchar *p); 754 return Str.toString(g_utf8_prev_char(Str.toStringz(p))); 755 } 756 757 /** 758 * Finds the start of the next UTF-8 character in the string after p. 759 * p does not have to be at the beginning of a UTF-8 character. No check 760 * is made to see if the character found is actually valid other than 761 * it starts with an appropriate byte. 762 * Params: 763 * p = a pointer to a position within a UTF-8 encoded string 764 * end = a pointer to the byte following the end of the string, 765 * or NULL to indicate that the string is nul-terminated. 766 * Returns: a pointer to the found character or NULL 767 */ 768 public static string utf8_FindNextChar(string p, string end) 769 { 770 // gchar * g_utf8_find_next_char (const gchar *p, const gchar *end); 771 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end))); 772 } 773 774 /** 775 * Given a position p with a UTF-8 encoded string str, find the start 776 * of the previous UTF-8 character starting before p. Returns NULL if no 777 * UTF-8 characters are present in str before p. 778 * p does not have to be at the beginning of a UTF-8 character. No check 779 * is made to see if the character found is actually valid other than 780 * it starts with an appropriate byte. 781 * Params: 782 * str = pointer to the beginning of a UTF-8 encoded string 783 * p = pointer to some position within str 784 * Returns: a pointer to the found character or NULL. 785 */ 786 public static string utf8_FindPrevChar(string str, string p) 787 { 788 // gchar * g_utf8_find_prev_char (const gchar *str, const gchar *p); 789 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p))); 790 } 791 792 /** 793 * Computes the length of the string in characters, not including 794 * the terminating nul character. If the max'th byte falls in the 795 * middle of a character, the last (partial) character is not counted. 796 * Params: 797 * p = pointer to the start of a UTF-8 encoded string 798 * Returns: the length of the string in characters 799 */ 800 public static glong utf8_Strlen(string p) 801 { 802 // glong g_utf8_strlen (const gchar *p, gssize max); 803 return g_utf8_strlen(cast(char*)p.ptr, cast(int) p.length); 804 } 805 806 /** 807 * Like the standard C strncpy() function, but 808 * copies a given number of characters instead of a given number of 809 * bytes. The src string must be valid UTF-8 encoded text. 810 * (Use g_utf8_validate() on all text before trying to use UTF-8 811 * utility functions with it.) 812 * Params: 813 * dest = buffer to fill with characters from src 814 * src = UTF-8 encoded string 815 * n = character count 816 * Returns: dest 817 */ 818 public static string utf8_Strncpy(string dest, string src, gsize n) 819 { 820 // gchar * g_utf8_strncpy (gchar *dest, const gchar *src, gsize n); 821 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n)); 822 } 823 824 /** 825 * Finds the leftmost occurrence of the given Unicode character 826 * in a UTF-8 encoded string, while limiting the search to len bytes. 827 * If len is -1, allow unbounded search. 828 * Params: 829 * p = a nul-terminated UTF-8 encoded string 830 * len = the maximum length of p 831 * c = a Unicode character 832 * Returns: NULL if the string does not contain the character, otherwise, a pointer to the start of the leftmost occurrence of the character in the string. 833 */ 834 public static string utf8_Strchr(string p, gssize len, gunichar c) 835 { 836 // gchar * g_utf8_strchr (const gchar *p, gssize len, gunichar c); 837 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c)); 838 } 839 840 /** 841 * Find the rightmost occurrence of the given Unicode character 842 * in a UTF-8 encoded string, while limiting the search to len bytes. 843 * If len is -1, allow unbounded search. 844 * Params: 845 * p = a nul-terminated UTF-8 encoded string 846 * len = the maximum length of p 847 * c = a Unicode character 848 * Returns: NULL if the string does not contain the character, otherwise, a pointer to the start of the rightmost occurrence of the character in the string. 849 */ 850 public static string utf8_Strrchr(string p, gssize len, gunichar c) 851 { 852 // gchar * g_utf8_strrchr (const gchar *p, gssize len, gunichar c); 853 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c)); 854 } 855 856 /** 857 * Reverses a UTF-8 string. str must be valid UTF-8 encoded text. 858 * (Use g_utf8_validate() on all text before trying to use UTF-8 859 * utility functions with it.) 860 * This function is intended for programmatic uses of reversed strings. 861 * It pays no attention to decomposed characters, combining marks, byte 862 * order marks, directional indicators (LRM, LRO, etc) and similar 863 * characters which might need special handling when reversing a string 864 * for display purposes. 865 * Note that unlike g_strreverse(), this function returns 866 * newly-allocated memory, which should be freed with g_free() when 867 * no longer needed. 868 * Since 2.2 869 * Params: 870 * str = a UTF-8 encoded string 871 * Returns: a newly-allocated string which is the reverse of str. 872 */ 873 public static string utf8_Strreverse(string str) 874 { 875 // gchar * g_utf8_strreverse (const gchar *str, gssize len); 876 return Str.toString(g_utf8_strreverse(cast(char*)str.ptr, cast(int) str.length)); 877 } 878 879 /** 880 * Copies a substring out of a UTF-8 encoded string. 881 * The substring will contain end_pos - start_pos 882 * characters. 883 * Since 2.30 884 * Params: 885 * str = a UTF-8 encoded string 886 * startPos = a character offset within str 887 * endPos = another character offset within str 888 * Returns: a newly allocated copy of the requested substring. Free with g_free() when no longer needed. 889 */ 890 public static string utf8_Substring(string str, glong startPos, glong endPos) 891 { 892 // gchar * g_utf8_substring (const gchar *str, glong start_pos, glong end_pos); 893 return Str.toString(g_utf8_substring(Str.toStringz(str), startPos, endPos)); 894 } 895 896 /** 897 * Validates UTF-8 encoded text. str is the text to validate; 898 * if str is nul-terminated, then max_len can be -1, otherwise 899 * max_len should be the number of bytes to validate. 900 * If end is non-NULL, then the end of the valid range 901 * will be stored there (i.e. the start of the first invalid 902 * character if some bytes were invalid, or the end of the text 903 * being validated otherwise). 904 * Note that g_utf8_validate() returns FALSE if max_len is 905 * positive and any of the max_len bytes are NUL. 906 * Returns TRUE if all of str was valid. Many GLib and GTK+ 907 * routines require valid UTF-8 as input; 908 * so data read from a file or the network should be checked 909 * with g_utf8_validate() before doing anything else with it. 910 * Params: 911 * str = a pointer to character data. [array length=max_len][element-type guint8] 912 * end = return location for end of valid data. [allow-none][out][transfer none] 913 * Returns: TRUE if the text was valid UTF-8 914 */ 915 public static int utf8_Validate(string str, out string end) 916 { 917 // gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end); 918 char* outend = null; 919 920 auto p = g_utf8_validate(cast(char*)str.ptr, cast(int) str.length, &outend); 921 922 end = Str.toString(outend); 923 return p; 924 } 925 926 /** 927 * Converts all Unicode characters in the string that have a case 928 * to uppercase. The exact manner that this is done depends 929 * on the current locale, and may result in the number of 930 * characters in the string increasing. (For instance, the 931 * German ess-zet will be changed to SS.) 932 * Params: 933 * str = a UTF-8 encoded string 934 * Returns: a newly allocated string, with all characters converted to uppercase. 935 */ 936 public static string utf8_Strup(string str) 937 { 938 // gchar * g_utf8_strup (const gchar *str, gssize len); 939 return Str.toString(g_utf8_strup(cast(char*)str.ptr, cast(int) str.length)); 940 } 941 942 /** 943 * Converts all Unicode characters in the string that have a case 944 * to lowercase. The exact manner that this is done depends 945 * on the current locale, and may result in the number of 946 * characters in the string changing. 947 * Params: 948 * str = a UTF-8 encoded string 949 * Returns: a newly allocated string, with all characters converted to lowercase. 950 */ 951 public static string utf8_Strdown(string str) 952 { 953 // gchar * g_utf8_strdown (const gchar *str, gssize len); 954 return Str.toString(g_utf8_strdown(cast(char*)str.ptr, cast(int) str.length)); 955 } 956 957 /** 958 * Converts a string into a form that is independent of case. The 959 * result will not correspond to any particular case, but can be 960 * compared for equality or ordered with the results of calling 961 * g_utf8_casefold() on other strings. 962 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is 963 * only an approximation to the correct linguistic case insensitive 964 * ordering, though it is a fairly good one. Getting this exactly 965 * right would require a more sophisticated collation function that 966 * takes case sensitivity into account. GLib does not currently 967 * provide such a function. 968 * Params: 969 * str = a UTF-8 encoded string 970 * Returns: a newly allocated string, that is a case independent form of str. 971 */ 972 public static string utf8_Casefold(string str) 973 { 974 // gchar * g_utf8_casefold (const gchar *str, gssize len); 975 return Str.toString(g_utf8_casefold(cast(char*)str.ptr, cast(int) str.length)); 976 } 977 978 /** 979 * Converts a string into canonical form, standardizing 980 * such issues as whether a character with an accent 981 * is represented as a base character and combining 982 * accent or as a single precomposed character. The 983 * string has to be valid UTF-8, otherwise NULL is 984 * returned. You should generally call g_utf8_normalize() 985 * before comparing two Unicode strings. 986 * The normalization mode G_NORMALIZE_DEFAULT only 987 * standardizes differences that do not affect the 988 * text content, such as the above-mentioned accent 989 * representation. G_NORMALIZE_ALL also standardizes 990 * the "compatibility" characters in Unicode, such 991 * as SUPERSCRIPT THREE to the standard forms 992 * (in this case DIGIT THREE). Formatting information 993 * may be lost but for most text operations such 994 * characters should be considered the same. 995 * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE 996 * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL, 997 * but returned a result with composed forms rather 998 * than a maximally decomposed form. This is often 999 * useful if you intend to convert the string to 1000 * a legacy encoding or pass it to a system with 1001 * less capable Unicode handling. 1002 * Params: 1003 * str = a UTF-8 encoded string. 1004 * mode = the type of normalization to perform. 1005 * Returns: a newly allocated string, that is the normalized form of str, or NULL if str is not valid UTF-8. 1006 */ 1007 public static string utf8_Normalize(string str, GNormalizeMode mode) 1008 { 1009 // gchar * g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode); 1010 return Str.toString(g_utf8_normalize(cast(char*)str.ptr, cast(int) str.length, mode)); 1011 } 1012 1013 /** 1014 * Compares two strings for ordering using the linguistically 1015 * correct rules for the current locale. 1016 * When sorting a large number of strings, it will be significantly 1017 * faster to obtain collation keys with g_utf8_collate_key() and 1018 * compare the keys with strcmp() when sorting instead of sorting 1019 * the original strings. 1020 * Params: 1021 * str1 = a UTF-8 encoded string 1022 * str2 = a UTF-8 encoded string 1023 * Returns: < 0 if str1 compares before str2, 0 if they compare equal, > 0 if str1 compares after str2. 1024 */ 1025 public static int utf8_Collate(string str1, string str2) 1026 { 1027 // gint g_utf8_collate (const gchar *str1, const gchar *str2); 1028 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2)); 1029 } 1030 1031 /** 1032 * Converts a string into a collation key that can be compared 1033 * with other collation keys produced by the same function using 1034 * strcmp(). 1035 * The results of comparing the collation keys of two strings 1036 * with strcmp() will always be the same as comparing the two 1037 * original keys with g_utf8_collate(). 1038 * Note that this function depends on the 1039 * current locale. 1040 * Params: 1041 * str = a UTF-8 encoded string. 1042 * Returns: a newly allocated string. This string should be freed with g_free() when you are done with it. 1043 */ 1044 public static string utf8_CollateKey(string str) 1045 { 1046 // gchar * g_utf8_collate_key (const gchar *str, gssize len); 1047 return Str.toString(g_utf8_collate_key(cast(char*)str.ptr, cast(int) str.length)); 1048 } 1049 1050 /** 1051 * Converts a string into a collation key that can be compared 1052 * with other collation keys produced by the same function using strcmp(). 1053 * In order to sort filenames correctly, this function treats the dot '.' 1054 * as a special case. Most dictionary orderings seem to consider it 1055 * insignificant, thus producing the ordering "event.c" "eventgenerator.c" 1056 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we 1057 * would like to treat numbers intelligently so that "file1" "file10" "file5" 1058 * is sorted as "file1" "file5" "file10". 1059 * Note that this function depends on the 1060 * current locale. 1061 * Since 2.8 1062 * Params: 1063 * str = a UTF-8 encoded string. 1064 * len = length of str, in bytes, or -1 if str is nul-terminated. 1065 * Returns: a newly allocated string. This string should be freed with g_free() when you are done with it. 1066 */ 1067 public static string utf8_CollateKeyForFilename(string str) 1068 { 1069 // gchar * g_utf8_collate_key_for_filename (const gchar *str, gssize len); 1070 return Str.toString(g_utf8_collate_key_for_filename(cast(char*)str.ptr, cast(int) str.length)); 1071 } 1072 1073 /** 1074 * Convert a string from UTF-8 to UTF-16. A 0 character will be 1075 * added to the result after the converted text. 1076 * Params: 1077 * str = a UTF-8 encoded string 1078 * itemsRead = location to store number of bytes read, or NULL. 1079 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1080 * returned in case str contains a trailing partial 1081 * character. If an error occurs then the index of the 1082 * invalid input is stored here. [allow-none] 1083 * Returns: a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1084 * Throws: GException on failure. 1085 */ 1086 public static gunichar2[] utf8_ToUtf16(string str, out glong itemsRead) 1087 { 1088 // gunichar2 * g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); 1089 glong itemsWritten; 1090 GError* err = null; 1091 1092 auto p = g_utf8_to_utf16(cast(char*)str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1093 1094 if (err !is null) 1095 { 1096 throw new GException( new ErrorG(err) ); 1097 } 1098 1099 1100 if(p is null) 1101 { 1102 return null; 1103 } 1104 1105 return p[0 .. itemsWritten]; 1106 } 1107 1108 /** 1109 * Convert a string from UTF-8 to a 32-bit fixed width 1110 * representation as UCS-4. A trailing 0 character will be added to the 1111 * string after the converted text. 1112 * Params: 1113 * str = a UTF-8 encoded string 1114 * itemsRead = location to store number of bytes read, or NULL. 1115 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1116 * returned in case str contains a trailing partial 1117 * character. If an error occurs then the index of the 1118 * invalid input is stored here. [allow-none] 1119 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1120 * Throws: GException on failure. 1121 */ 1122 public static gunichar[] utf8_ToUcs4(string str, out glong itemsRead) 1123 { 1124 // gunichar * g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); 1125 glong itemsWritten; 1126 GError* err = null; 1127 1128 auto p = g_utf8_to_ucs4(cast(char*)str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1129 1130 if (err !is null) 1131 { 1132 throw new GException( new ErrorG(err) ); 1133 } 1134 1135 1136 if(p is null) 1137 { 1138 return null; 1139 } 1140 1141 return p[0 .. itemsWritten]; 1142 } 1143 1144 /** 1145 * Convert a string from UTF-8 to a 32-bit fixed width 1146 * representation as UCS-4, assuming valid UTF-8 input. 1147 * This function is roughly twice as fast as g_utf8_to_ucs4() 1148 * but does no error checking on the input. A trailing 0 character 1149 * will be added to the string after the converted text. 1150 * Params: 1151 * str = a UTF-8 encoded string 1152 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). 1153 */ 1154 public static gunichar[] utf8_ToUcs4_Fast(string str) 1155 { 1156 // gunichar * g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written); 1157 glong itemsWritten; 1158 auto p = g_utf8_to_ucs4_fast(cast(char*)str.ptr, cast(int) str.length, &itemsWritten); 1159 1160 if(p is null) 1161 { 1162 return null; 1163 } 1164 1165 return p[0 .. itemsWritten]; 1166 } 1167 1168 /** 1169 * Convert a string from UTF-16 to UCS-4. The result will be 1170 * nul-terminated. 1171 * Params: 1172 * str = a UTF-16 encoded string 1173 * itemsRead = location to store number of words read, or NULL. 1174 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1175 * returned in case str contains a trailing partial 1176 * character. If an error occurs then the index of the 1177 * invalid input is stored here. [allow-none] 1178 * Returns: a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1179 * Throws: GException on failure. 1180 */ 1181 public static gunichar[] utf16_ToUcs4(gunichar2[] str, out glong itemsRead) 1182 { 1183 // gunichar * g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); 1184 glong itemsWritten; 1185 GError* err = null; 1186 1187 auto p = g_utf16_to_ucs4(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1188 1189 if (err !is null) 1190 { 1191 throw new GException( new ErrorG(err) ); 1192 } 1193 1194 1195 if(p is null) 1196 { 1197 return null; 1198 } 1199 1200 return p[0 .. itemsWritten]; 1201 } 1202 1203 /** 1204 * Convert a string from UTF-16 to UTF-8. The result will be 1205 * terminated with a 0 byte. 1206 * Note that the input is expected to be already in native endianness, 1207 * an initial byte-order-mark character is not handled specially. 1208 * g_convert() can be used to convert a byte buffer of UTF-16 data of 1209 * ambiguous endianess. 1210 * Further note that this function does not validate the result 1211 * string; it may e.g. include embedded NUL characters. The only 1212 * validation done by this function is to ensure that the input can 1213 * be correctly interpreted as UTF-16, i.e. it doesn't contain 1214 * things unpaired surrogates. 1215 * Params: 1216 * str = a UTF-16 encoded string 1217 * itemsRead = location to store number of words read, or NULL. 1218 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be 1219 * returned in case str contains a trailing partial 1220 * character. If an error occurs then the index of the 1221 * invalid input is stored here. [allow-none] 1222 * Returns: a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1223 * Throws: GException on failure. 1224 */ 1225 public static string utf16_ToUtf8(gunichar2[] str, out glong itemsRead) 1226 { 1227 // gchar * g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); 1228 glong itemsWritten; 1229 GError* err = null; 1230 1231 auto p = g_utf16_to_utf8(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1232 1233 if (err !is null) 1234 { 1235 throw new GException( new ErrorG(err) ); 1236 } 1237 1238 return Str.toString(p, itemsWritten); 1239 } 1240 1241 /** 1242 * Convert a string from UCS-4 to UTF-16. A 0 character will be 1243 * added to the result after the converted text. 1244 * Params: 1245 * str = a UCS-4 encoded string 1246 * itemsRead = location to store number of bytes read, or NULL. 1247 * If an error occurs then the index of the invalid input 1248 * is stored here. [allow-none] 1249 * Returns: a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. 1250 * Throws: GException on failure. 1251 */ 1252 public static gunichar2[] ucs4_ToUtf16(gunichar[] str, out glong itemsRead) 1253 { 1254 // gunichar2 * g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); 1255 glong itemsWritten; 1256 GError* err = null; 1257 1258 auto p = g_ucs4_to_utf16(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1259 1260 if (err !is null) 1261 { 1262 throw new GException( new ErrorG(err) ); 1263 } 1264 1265 1266 if(p is null) 1267 { 1268 return null; 1269 } 1270 1271 return p[0 .. itemsWritten]; 1272 } 1273 1274 /** 1275 * Convert a string from a 32-bit fixed width representation as UCS-4. 1276 * to UTF-8. The result will be terminated with a 0 byte. 1277 * Params: 1278 * str = a UCS-4 encoded string 1279 * itemsRead = location to store number of characters read, or NULL. [allow-none] 1280 * Returns: a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set. In that case, items_read will be set to the position of the first invalid input character. 1281 * Throws: GException on failure. 1282 */ 1283 public static string ucs4_ToUtf8(gunichar[] str, out glong itemsRead) 1284 { 1285 // gchar * g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); 1286 glong itemsWritten; 1287 GError* err = null; 1288 1289 auto p = g_ucs4_to_utf8(str.ptr, cast(int) str.length, &itemsRead, &itemsWritten, &err); 1290 1291 if (err !is null) 1292 { 1293 throw new GException( new ErrorG(err) ); 1294 } 1295 1296 return Str.toString(p, itemsWritten); 1297 } 1298 1299 /** 1300 * Converts a single character to UTF-8. 1301 * Params: 1302 * c = a Unicode character code 1303 * outbuf = output buffer, must have at least 6 bytes of space. 1304 * If NULL, the length will be computed and returned 1305 * and nothing will be written to outbuf. 1306 * Returns: number of bytes written 1307 */ 1308 public static int unicharToUtf8(gunichar c, string outbuf) 1309 { 1310 // gint g_unichar_to_utf8 (gunichar c, gchar *outbuf); 1311 return g_unichar_to_utf8(c, Str.toStringz(outbuf)); 1312 } 1313 }