1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 // generated automatically - do not change 21 // find conversion definition on APILookup.txt 22 // implement new conversion functionalities on the wrap.utils pakage 23 24 25 module glib.Regex; 26 27 private import glib.ConstructionException; 28 private import glib.ErrorG; 29 private import glib.GException; 30 private import glib.MatchInfo; 31 private import glib.Str; 32 private import gtkc.glib; 33 public import gtkc.glibtypes; 34 private import gtkd.Loader; 35 36 37 /** 38 * The g_regex_*() functions implement regular 39 * expression pattern matching using syntax and semantics similar to 40 * Perl regular expression. 41 * 42 * Some functions accept a @start_position argument, setting it differs 43 * from just passing over a shortened string and setting #G_REGEX_MATCH_NOTBOL 44 * in the case of a pattern that begins with any kind of lookbehind assertion. 45 * For example, consider the pattern "\Biss\B" which finds occurrences of "iss" 46 * in the middle of words. ("\B" matches only if the current position in the 47 * subject is not a word boundary.) When applied to the string "Mississipi" 48 * from the fourth byte, namely "issipi", it does not match, because "\B" is 49 * always false at the start of the subject, which is deemed to be a word 50 * boundary. However, if the entire string is passed , but with 51 * @start_position set to 4, it finds the second occurrence of "iss" because 52 * it is able to look behind the starting point to discover that it is 53 * preceded by a letter. 54 * 55 * Note that, unless you set the #G_REGEX_RAW flag, all the strings passed 56 * to these functions must be encoded in UTF-8. The lengths and the positions 57 * inside the strings are in bytes and not in characters, so, for instance, 58 * "\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a 59 * single character. If you set #G_REGEX_RAW the strings can be non-valid 60 * UTF-8 strings and a byte is treated as a character, so "\xc3\xa0" is two 61 * bytes and two characters long. 62 * 63 * When matching a pattern, "\n" matches only against a "\n" character in 64 * the string, and "\r" matches only a "\r" character. To match any newline 65 * sequence use "\R". This particular group matches either the two-character 66 * sequence CR + LF ("\r\n"), or one of the single characters LF (linefeed, 67 * U+000A, "\n"), VT vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), 68 * CR (carriage return, U+000D, "\r"), NEL (next line, U+0085), LS (line 69 * separator, U+2028), or PS (paragraph separator, U+2029). 70 * 71 * The behaviour of the dot, circumflex, and dollar metacharacters are 72 * affected by newline characters, the default is to recognize any newline 73 * character (the same characters recognized by "\R"). This can be changed 74 * with #G_REGEX_NEWLINE_CR, #G_REGEX_NEWLINE_LF and #G_REGEX_NEWLINE_CRLF 75 * compile options, and with #G_REGEX_MATCH_NEWLINE_ANY, 76 * #G_REGEX_MATCH_NEWLINE_CR, #G_REGEX_MATCH_NEWLINE_LF and 77 * #G_REGEX_MATCH_NEWLINE_CRLF match options. These settings are also 78 * relevant when compiling a pattern if #G_REGEX_EXTENDED is set, and an 79 * unescaped "#" outside a character class is encountered. This indicates 80 * a comment that lasts until after the next newline. 81 * 82 * When setting the %G_REGEX_JAVASCRIPT_COMPAT flag, pattern syntax and pattern 83 * matching is changed to be compatible with the way that regular expressions 84 * work in JavaScript. More precisely, a lonely ']' character in the pattern 85 * is a syntax error; the '\x' escape only allows 0 to 2 hexadecimal digits, and 86 * you must use the '\u' escape sequence with 4 hex digits to specify a unicode 87 * codepoint instead of '\x' or 'x{....}'. If '\x' or '\u' are not followed by 88 * the specified number of hex digits, they match 'x' and 'u' literally; also 89 * '\U' always matches 'U' instead of being an error in the pattern. Finally, 90 * pattern matching is modified so that back references to an unset subpattern 91 * group produces a match with the empty string instead of an error. See 92 * pcreapi(3) for more information. 93 * 94 * Creating and manipulating the same #GRegex structure from different 95 * threads is not a problem as #GRegex does not modify its internal 96 * state between creation and destruction, on the other hand #GMatchInfo 97 * is not threadsafe. 98 * 99 * The regular expressions low-level functionalities are obtained through 100 * the excellent 101 * [PCRE](http://www.pcre.org/) 102 * library written by Philip Hazel. 103 * 104 * Since: 2.14 105 */ 106 public class Regex 107 { 108 /** the main Gtk struct */ 109 protected GRegex* gRegex; 110 protected bool ownedRef; 111 112 /** Get the main Gtk struct */ 113 public GRegex* getRegexStruct(bool transferOwnership = false) 114 { 115 if (transferOwnership) 116 ownedRef = false; 117 return gRegex; 118 } 119 120 /** the main Gtk struct as a void* */ 121 protected void* getStruct() 122 { 123 return cast(void*)gRegex; 124 } 125 126 /** 127 * Sets our main struct and passes it to the parent class. 128 */ 129 public this (GRegex* gRegex, bool ownedRef = false) 130 { 131 this.gRegex = gRegex; 132 this.ownedRef = ownedRef; 133 } 134 135 ~this () 136 { 137 if ( Linker.isLoaded(LIBRARY_GLIB) && ownedRef ) 138 g_regex_unref(gRegex); 139 } 140 141 142 /** 143 * Compiles the regular expression to an internal form, and does 144 * the initial setup of the #GRegex structure. 145 * 146 * Params: 147 * pattern = the regular expression 148 * compileOptions = compile options for the regular expression, or 0 149 * matchOptions = match options for the regular expression, or 0 150 * 151 * Returns: a #GRegex structure or %NULL if an error occured. Call 152 * g_regex_unref() when you are done with it 153 * 154 * Since: 2.14 155 * 156 * Throws: GException on failure. 157 * Throws: ConstructionException GTK+ fails to create the object. 158 */ 159 public this(string pattern, GRegexCompileFlags compileOptions, GRegexMatchFlags matchOptions) 160 { 161 GError* err = null; 162 163 auto p = g_regex_new(Str.toStringz(pattern), compileOptions, matchOptions, &err); 164 165 if (err !is null) 166 { 167 throw new GException( new ErrorG(err) ); 168 } 169 170 if(p is null) 171 { 172 throw new ConstructionException("null returned by new"); 173 } 174 175 this(cast(GRegex*) p); 176 } 177 178 /** 179 * Returns the number of capturing subpatterns in the pattern. 180 * 181 * Returns: the number of capturing subpatterns 182 * 183 * Since: 2.14 184 */ 185 public int getCaptureCount() 186 { 187 return g_regex_get_capture_count(gRegex); 188 } 189 190 /** 191 * Returns the compile options that @regex was created with. 192 * 193 * Depending on the version of PCRE that is used, this may or may not 194 * include flags set by option expressions such as `(?i)` found at the 195 * top-level within the compiled pattern. 196 * 197 * Returns: flags from #GRegexCompileFlags 198 * 199 * Since: 2.26 200 */ 201 public GRegexCompileFlags getCompileFlags() 202 { 203 return g_regex_get_compile_flags(gRegex); 204 } 205 206 /** 207 * Checks whether the pattern contains explicit CR or LF references. 208 * 209 * Returns: %TRUE if the pattern contains explicit CR or LF references 210 * 211 * Since: 2.34 212 */ 213 public bool getHasCrOrLf() 214 { 215 return g_regex_get_has_cr_or_lf(gRegex) != 0; 216 } 217 218 /** 219 * Returns the match options that @regex was created with. 220 * 221 * Returns: flags from #GRegexMatchFlags 222 * 223 * Since: 2.26 224 */ 225 public GRegexMatchFlags getMatchFlags() 226 { 227 return g_regex_get_match_flags(gRegex); 228 } 229 230 /** 231 * Returns the number of the highest back reference 232 * in the pattern, or 0 if the pattern does not contain 233 * back references. 234 * 235 * Returns: the number of the highest back reference 236 * 237 * Since: 2.14 238 */ 239 public int getMaxBackref() 240 { 241 return g_regex_get_max_backref(gRegex); 242 } 243 244 /** 245 * Gets the number of characters in the longest lookbehind assertion in the 246 * pattern. This information is useful when doing multi-segment matching using 247 * the partial matching facilities. 248 * 249 * Returns: the number of characters in the longest lookbehind assertion. 250 * 251 * Since: 2.38 252 */ 253 public int getMaxLookbehind() 254 { 255 return g_regex_get_max_lookbehind(gRegex); 256 } 257 258 /** 259 * Gets the pattern string associated with @regex, i.e. a copy of 260 * the string passed to g_regex_new(). 261 * 262 * Returns: the pattern of @regex 263 * 264 * Since: 2.14 265 */ 266 public string getPattern() 267 { 268 return Str.toString(g_regex_get_pattern(gRegex)); 269 } 270 271 /** 272 * Retrieves the number of the subexpression named @name. 273 * 274 * Params: 275 * name = name of the subexpression 276 * 277 * Returns: The number of the subexpression or -1 if @name 278 * does not exists 279 * 280 * Since: 2.14 281 */ 282 public int getStringNumber(string name) 283 { 284 return g_regex_get_string_number(gRegex, Str.toStringz(name)); 285 } 286 287 /** 288 * Scans for a match in string for the pattern in @regex. 289 * The @match_options are combined with the match options specified 290 * when the @regex structure was created, letting you have more 291 * flexibility in reusing #GRegex structures. 292 * 293 * A #GMatchInfo structure, used to get information on the match, 294 * is stored in @match_info if not %NULL. Note that if @match_info 295 * is not %NULL then it is created even if the function returns %FALSE, 296 * i.e. you must free it regardless if regular expression actually matched. 297 * 298 * To retrieve all the non-overlapping matches of the pattern in 299 * string you can use g_match_info_next(). 300 * 301 * |[<!-- language="C" --> 302 * static void 303 * print_uppercase_words (const gchar *string) 304 * { 305 * // Print all uppercase-only words. 306 * GRegex *regex; 307 * GMatchInfo *match_info; 308 * 309 * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); 310 * g_regex_match (regex, string, 0, &match_info); 311 * while (g_match_info_matches (match_info)) 312 * { 313 * gchar *word = g_match_info_fetch (match_info, 0); 314 * g_print ("Found: %s\n", word); 315 * g_free (word); 316 * g_match_info_next (match_info, NULL); 317 * } 318 * g_match_info_free (match_info); 319 * g_regex_unref (regex); 320 * } 321 * ]| 322 * 323 * @string is not copied and is used in #GMatchInfo internally. If 324 * you use any #GMatchInfo method (except g_match_info_free()) after 325 * freeing or modifying @string then the behaviour is undefined. 326 * 327 * Params: 328 * str = the string to scan for matches 329 * matchOptions = match options 330 * matchInfo = pointer to location where to store 331 * the #GMatchInfo, or %NULL if you do not need it 332 * 333 * Returns: %TRUE is the string matched, %FALSE otherwise 334 * 335 * Since: 2.14 336 */ 337 public bool match(string str, GRegexMatchFlags matchOptions, out MatchInfo matchInfo) 338 { 339 GMatchInfo* outmatchInfo = null; 340 341 auto p = g_regex_match(gRegex, Str.toStringz(str), matchOptions, &outmatchInfo) != 0; 342 343 matchInfo = new MatchInfo(outmatchInfo); 344 345 return p; 346 } 347 348 /** 349 * Using the standard algorithm for regular expression matching only 350 * the longest match in the string is retrieved. This function uses 351 * a different algorithm so it can retrieve all the possible matches. 352 * For more documentation see g_regex_match_all_full(). 353 * 354 * A #GMatchInfo structure, used to get information on the match, is 355 * stored in @match_info if not %NULL. Note that if @match_info is 356 * not %NULL then it is created even if the function returns %FALSE, 357 * i.e. you must free it regardless if regular expression actually 358 * matched. 359 * 360 * @string is not copied and is used in #GMatchInfo internally. If 361 * you use any #GMatchInfo method (except g_match_info_free()) after 362 * freeing or modifying @string then the behaviour is undefined. 363 * 364 * Params: 365 * str = the string to scan for matches 366 * matchOptions = match options 367 * matchInfo = pointer to location where to store 368 * the #GMatchInfo, or %NULL if you do not need it 369 * 370 * Returns: %TRUE is the string matched, %FALSE otherwise 371 * 372 * Since: 2.14 373 */ 374 public bool matchAll(string str, GRegexMatchFlags matchOptions, out MatchInfo matchInfo) 375 { 376 GMatchInfo* outmatchInfo = null; 377 378 auto p = g_regex_match_all(gRegex, Str.toStringz(str), matchOptions, &outmatchInfo) != 0; 379 380 matchInfo = new MatchInfo(outmatchInfo); 381 382 return p; 383 } 384 385 /** 386 * Using the standard algorithm for regular expression matching only 387 * the longest match in the string is retrieved, it is not possible 388 * to obtain all the available matches. For instance matching 389 * "<a> <b> <c>" against the pattern "<.*>" 390 * you get "<a> <b> <c>". 391 * 392 * This function uses a different algorithm (called DFA, i.e. deterministic 393 * finite automaton), so it can retrieve all the possible matches, all 394 * starting at the same point in the string. For instance matching 395 * "<a> <b> <c>" against the pattern "<.*>;" 396 * you would obtain three matches: "<a> <b> <c>", 397 * "<a> <b>" and "<a>". 398 * 399 * The number of matched strings is retrieved using 400 * g_match_info_get_match_count(). To obtain the matched strings and 401 * their position you can use, respectively, g_match_info_fetch() and 402 * g_match_info_fetch_pos(). Note that the strings are returned in 403 * reverse order of length; that is, the longest matching string is 404 * given first. 405 * 406 * Note that the DFA algorithm is slower than the standard one and it 407 * is not able to capture substrings, so backreferences do not work. 408 * 409 * Setting @start_position differs from just passing over a shortened 410 * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern 411 * that begins with any kind of lookbehind assertion, such as "\b". 412 * 413 * A #GMatchInfo structure, used to get information on the match, is 414 * stored in @match_info if not %NULL. Note that if @match_info is 415 * not %NULL then it is created even if the function returns %FALSE, 416 * i.e. you must free it regardless if regular expression actually 417 * matched. 418 * 419 * @string is not copied and is used in #GMatchInfo internally. If 420 * you use any #GMatchInfo method (except g_match_info_free()) after 421 * freeing or modifying @string then the behaviour is undefined. 422 * 423 * Params: 424 * str = the string to scan for matches 425 * stringLen = the length of @string, or -1 if @string is nul-terminated 426 * startPosition = starting index of the string to match, in bytes 427 * matchOptions = match options 428 * matchInfo = pointer to location where to store 429 * the #GMatchInfo, or %NULL if you do not need it 430 * 431 * Returns: %TRUE is the string matched, %FALSE otherwise 432 * 433 * Since: 2.14 434 * 435 * Throws: GException on failure. 436 */ 437 public bool matchAllFull(string str, int startPosition, GRegexMatchFlags matchOptions, out MatchInfo matchInfo) 438 { 439 GMatchInfo* outmatchInfo = null; 440 GError* err = null; 441 442 auto p = g_regex_match_all_full(gRegex, Str.toStringz(str), cast(ptrdiff_t)str.length, startPosition, matchOptions, &outmatchInfo, &err) != 0; 443 444 if (err !is null) 445 { 446 throw new GException( new ErrorG(err) ); 447 } 448 449 matchInfo = new MatchInfo(outmatchInfo); 450 451 return p; 452 } 453 454 /** 455 * Scans for a match in string for the pattern in @regex. 456 * The @match_options are combined with the match options specified 457 * when the @regex structure was created, letting you have more 458 * flexibility in reusing #GRegex structures. 459 * 460 * Setting @start_position differs from just passing over a shortened 461 * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern 462 * that begins with any kind of lookbehind assertion, such as "\b". 463 * 464 * A #GMatchInfo structure, used to get information on the match, is 465 * stored in @match_info if not %NULL. Note that if @match_info is 466 * not %NULL then it is created even if the function returns %FALSE, 467 * i.e. you must free it regardless if regular expression actually 468 * matched. 469 * 470 * @string is not copied and is used in #GMatchInfo internally. If 471 * you use any #GMatchInfo method (except g_match_info_free()) after 472 * freeing or modifying @string then the behaviour is undefined. 473 * 474 * To retrieve all the non-overlapping matches of the pattern in 475 * string you can use g_match_info_next(). 476 * 477 * |[<!-- language="C" --> 478 * static void 479 * print_uppercase_words (const gchar *string) 480 * { 481 * // Print all uppercase-only words. 482 * GRegex *regex; 483 * GMatchInfo *match_info; 484 * GError *error = NULL; 485 * 486 * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); 487 * g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error); 488 * while (g_match_info_matches (match_info)) 489 * { 490 * gchar *word = g_match_info_fetch (match_info, 0); 491 * g_print ("Found: %s\n", word); 492 * g_free (word); 493 * g_match_info_next (match_info, &error); 494 * } 495 * g_match_info_free (match_info); 496 * g_regex_unref (regex); 497 * if (error != NULL) 498 * { 499 * g_printerr ("Error while matching: %s\n", error->message); 500 * g_error_free (error); 501 * } 502 * } 503 * ]| 504 * 505 * Params: 506 * str = the string to scan for matches 507 * stringLen = the length of @string, or -1 if @string is nul-terminated 508 * startPosition = starting index of the string to match, in bytes 509 * matchOptions = match options 510 * matchInfo = pointer to location where to store 511 * the #GMatchInfo, or %NULL if you do not need it 512 * 513 * Returns: %TRUE is the string matched, %FALSE otherwise 514 * 515 * Since: 2.14 516 * 517 * Throws: GException on failure. 518 */ 519 public bool matchFull(string str, int startPosition, GRegexMatchFlags matchOptions, out MatchInfo matchInfo) 520 { 521 GMatchInfo* outmatchInfo = null; 522 GError* err = null; 523 524 auto p = g_regex_match_full(gRegex, Str.toStringz(str), cast(ptrdiff_t)str.length, startPosition, matchOptions, &outmatchInfo, &err) != 0; 525 526 if (err !is null) 527 { 528 throw new GException( new ErrorG(err) ); 529 } 530 531 matchInfo = new MatchInfo(outmatchInfo); 532 533 return p; 534 } 535 536 /** 537 * Increases reference count of @regex by 1. 538 * 539 * Returns: @regex 540 * 541 * Since: 2.14 542 */ 543 public Regex doref() 544 { 545 auto p = g_regex_ref(gRegex); 546 547 if(p is null) 548 { 549 return null; 550 } 551 552 return new Regex(cast(GRegex*) p, true); 553 } 554 555 /** 556 * Replaces all occurrences of the pattern in @regex with the 557 * replacement text. Backreferences of the form '\number' or 558 * '\g<number>' in the replacement text are interpolated by the 559 * number-th captured subexpression of the match, '\g<name>' refers 560 * to the captured subexpression with the given name. '\0' refers 561 * to the complete match, but '\0' followed by a number is the octal 562 * representation of a character. To include a literal '\' in the 563 * replacement, write '\\'. 564 * 565 * There are also escapes that changes the case of the following text: 566 * 567 * - \l: Convert to lower case the next character 568 * - \u: Convert to upper case the next character 569 * - \L: Convert to lower case till \E 570 * - \U: Convert to upper case till \E 571 * - \E: End case modification 572 * 573 * If you do not need to use backreferences use g_regex_replace_literal(). 574 * 575 * The @replacement string must be UTF-8 encoded even if #G_REGEX_RAW was 576 * passed to g_regex_new(). If you want to use not UTF-8 encoded stings 577 * you can use g_regex_replace_literal(). 578 * 579 * Setting @start_position differs from just passing over a shortened 580 * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern that 581 * begins with any kind of lookbehind assertion, such as "\b". 582 * 583 * Params: 584 * str = the string to perform matches against 585 * stringLen = the length of @string, or -1 if @string is nul-terminated 586 * startPosition = starting index of the string to match, in bytes 587 * replacement = text to replace each match with 588 * matchOptions = options for the match 589 * 590 * Returns: a newly allocated string containing the replacements 591 * 592 * Since: 2.14 593 * 594 * Throws: GException on failure. 595 */ 596 public string replace(string str, int startPosition, string replacement, GRegexMatchFlags matchOptions) 597 { 598 GError* err = null; 599 600 auto retStr = g_regex_replace(gRegex, Str.toStringz(str), cast(ptrdiff_t)str.length, startPosition, Str.toStringz(replacement), matchOptions, &err); 601 602 if (err !is null) 603 { 604 throw new GException( new ErrorG(err) ); 605 } 606 607 scope(exit) Str.freeString(retStr); 608 return Str.toString(retStr); 609 } 610 611 /** 612 * Replaces occurrences of the pattern in regex with the output of 613 * @eval for that occurrence. 614 * 615 * Setting @start_position differs from just passing over a shortened 616 * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern 617 * that begins with any kind of lookbehind assertion, such as "\b". 618 * 619 * The following example uses g_regex_replace_eval() to replace multiple 620 * strings at once: 621 * |[<!-- language="C" --> 622 * static gboolean 623 * eval_cb (const GMatchInfo *info, 624 * GString *res, 625 * gpointer data) 626 * { 627 * gchar *match; 628 * gchar *r; 629 * 630 * match = g_match_info_fetch (info, 0); 631 * r = g_hash_table_lookup ((GHashTable *)data, match); 632 * g_string_append (res, r); 633 * g_free (match); 634 * 635 * return FALSE; 636 * } 637 * 638 * ... 639 * 640 * GRegex *reg; 641 * GHashTable *h; 642 * gchar *res; 643 * 644 * h = g_hash_table_new (g_str_hash, g_str_equal); 645 * 646 * g_hash_table_insert (h, "1", "ONE"); 647 * g_hash_table_insert (h, "2", "TWO"); 648 * g_hash_table_insert (h, "3", "THREE"); 649 * g_hash_table_insert (h, "4", "FOUR"); 650 * 651 * reg = g_regex_new ("1|2|3|4", 0, 0, NULL); 652 * res = g_regex_replace_eval (reg, text, -1, 0, 0, eval_cb, h, NULL); 653 * g_hash_table_destroy (h); 654 * 655 * ... 656 * ]| 657 * 658 * Params: 659 * str = string to perform matches against 660 * stringLen = the length of @string, or -1 if @string is nul-terminated 661 * startPosition = starting index of the string to match, in bytes 662 * matchOptions = options for the match 663 * eval = a function to call for each match 664 * userData = user data to pass to the function 665 * 666 * Returns: a newly allocated string containing the replacements 667 * 668 * Since: 2.14 669 * 670 * Throws: GException on failure. 671 */ 672 public string replaceEval(string str, int startPosition, GRegexMatchFlags matchOptions, GRegexEvalCallback eval, void* userData) 673 { 674 GError* err = null; 675 676 auto retStr = g_regex_replace_eval(gRegex, Str.toStringz(str), cast(ptrdiff_t)str.length, startPosition, matchOptions, eval, userData, &err); 677 678 if (err !is null) 679 { 680 throw new GException( new ErrorG(err) ); 681 } 682 683 scope(exit) Str.freeString(retStr); 684 return Str.toString(retStr); 685 } 686 687 /** 688 * Replaces all occurrences of the pattern in @regex with the 689 * replacement text. @replacement is replaced literally, to 690 * include backreferences use g_regex_replace(). 691 * 692 * Setting @start_position differs from just passing over a 693 * shortened string and setting #G_REGEX_MATCH_NOTBOL in the 694 * case of a pattern that begins with any kind of lookbehind 695 * assertion, such as "\b". 696 * 697 * Params: 698 * str = the string to perform matches against 699 * stringLen = the length of @string, or -1 if @string is nul-terminated 700 * startPosition = starting index of the string to match, in bytes 701 * replacement = text to replace each match with 702 * matchOptions = options for the match 703 * 704 * Returns: a newly allocated string containing the replacements 705 * 706 * Since: 2.14 707 * 708 * Throws: GException on failure. 709 */ 710 public string replaceLiteral(string str, int startPosition, string replacement, GRegexMatchFlags matchOptions) 711 { 712 GError* err = null; 713 714 auto retStr = g_regex_replace_literal(gRegex, Str.toStringz(str), cast(ptrdiff_t)str.length, startPosition, Str.toStringz(replacement), matchOptions, &err); 715 716 if (err !is null) 717 { 718 throw new GException( new ErrorG(err) ); 719 } 720 721 scope(exit) Str.freeString(retStr); 722 return Str.toString(retStr); 723 } 724 725 /** 726 * Breaks the string on the pattern, and returns an array of the tokens. 727 * If the pattern contains capturing parentheses, then the text for each 728 * of the substrings will also be returned. If the pattern does not match 729 * anywhere in the string, then the whole string is returned as the first 730 * token. 731 * 732 * As a special case, the result of splitting the empty string "" is an 733 * empty vector, not a vector containing a single string. The reason for 734 * this special case is that being able to represent a empty vector is 735 * typically more useful than consistent handling of empty elements. If 736 * you do need to represent empty elements, you'll need to check for the 737 * empty string before calling this function. 738 * 739 * A pattern that can match empty strings splits @string into separate 740 * characters wherever it matches the empty string between characters. 741 * For example splitting "ab c" using as a separator "\s*", you will get 742 * "a", "b" and "c". 743 * 744 * Params: 745 * str = the string to split with the pattern 746 * matchOptions = match time option flags 747 * 748 * Returns: a %NULL-terminated gchar ** array. Free 749 * it using g_strfreev() 750 * 751 * Since: 2.14 752 */ 753 public string[] split(string str, GRegexMatchFlags matchOptions) 754 { 755 auto retStr = g_regex_split(gRegex, Str.toStringz(str), matchOptions); 756 757 scope(exit) Str.freeStringArray(retStr); 758 return Str.toStringArray(retStr); 759 } 760 761 /** 762 * Breaks the string on the pattern, and returns an array of the tokens. 763 * If the pattern contains capturing parentheses, then the text for each 764 * of the substrings will also be returned. If the pattern does not match 765 * anywhere in the string, then the whole string is returned as the first 766 * token. 767 * 768 * As a special case, the result of splitting the empty string "" is an 769 * empty vector, not a vector containing a single string. The reason for 770 * this special case is that being able to represent a empty vector is 771 * typically more useful than consistent handling of empty elements. If 772 * you do need to represent empty elements, you'll need to check for the 773 * empty string before calling this function. 774 * 775 * A pattern that can match empty strings splits @string into separate 776 * characters wherever it matches the empty string between characters. 777 * For example splitting "ab c" using as a separator "\s*", you will get 778 * "a", "b" and "c". 779 * 780 * Setting @start_position differs from just passing over a shortened 781 * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern 782 * that begins with any kind of lookbehind assertion, such as "\b". 783 * 784 * Params: 785 * str = the string to split with the pattern 786 * stringLen = the length of @string, or -1 if @string is nul-terminated 787 * startPosition = starting index of the string to match, in bytes 788 * matchOptions = match time option flags 789 * maxTokens = the maximum number of tokens to split @string into. 790 * If this is less than 1, the string is split completely 791 * 792 * Returns: a %NULL-terminated gchar ** array. Free 793 * it using g_strfreev() 794 * 795 * Since: 2.14 796 * 797 * Throws: GException on failure. 798 */ 799 public string[] splitFull(string str, int startPosition, GRegexMatchFlags matchOptions, int maxTokens) 800 { 801 GError* err = null; 802 803 auto retStr = g_regex_split_full(gRegex, Str.toStringz(str), cast(ptrdiff_t)str.length, startPosition, matchOptions, maxTokens, &err); 804 805 if (err !is null) 806 { 807 throw new GException( new ErrorG(err) ); 808 } 809 810 scope(exit) Str.freeStringArray(retStr); 811 return Str.toStringArray(retStr); 812 } 813 814 /** 815 * Decreases reference count of @regex by 1. When reference count drops 816 * to zero, it frees all the memory associated with the regex structure. 817 * 818 * Since: 2.14 819 */ 820 public void unref() 821 { 822 g_regex_unref(gRegex); 823 } 824 825 /** 826 * Checks whether @replacement is a valid replacement string 827 * (see g_regex_replace()), i.e. that all escape sequences in 828 * it are valid. 829 * 830 * If @has_references is not %NULL then @replacement is checked 831 * for pattern references. For instance, replacement text 'foo\n' 832 * does not contain references and may be evaluated without information 833 * about actual match, but '\0\1' (whole match followed by first 834 * subpattern) requires valid #GMatchInfo object. 835 * 836 * Params: 837 * replacement = the replacement string 838 * hasReferences = location to store information about 839 * references in @replacement or %NULL 840 * 841 * Returns: whether @replacement is a valid replacement string 842 * 843 * Since: 2.14 844 * 845 * Throws: GException on failure. 846 */ 847 public static bool checkReplacement(string replacement, out bool hasReferences) 848 { 849 int outhasReferences; 850 GError* err = null; 851 852 auto p = g_regex_check_replacement(Str.toStringz(replacement), &outhasReferences, &err) != 0; 853 854 if (err !is null) 855 { 856 throw new GException( new ErrorG(err) ); 857 } 858 859 hasReferences = (outhasReferences == 1); 860 861 return p; 862 } 863 864 /** */ 865 public static GQuark errorQuark() 866 { 867 return g_regex_error_quark(); 868 } 869 870 /** 871 * Escapes the nul characters in @string to "\x00". It can be used 872 * to compile a regex with embedded nul characters. 873 * 874 * For completeness, @length can be -1 for a nul-terminated string. 875 * In this case the output string will be of course equal to @string. 876 * 877 * Params: 878 * str = the string to escape 879 * length = the length of @string 880 * 881 * Returns: a newly-allocated escaped string 882 * 883 * Since: 2.30 884 */ 885 public static string escapeNul(string str, int length) 886 { 887 auto retStr = g_regex_escape_nul(Str.toStringz(str), length); 888 889 scope(exit) Str.freeString(retStr); 890 return Str.toString(retStr); 891 } 892 893 /** 894 * Escapes the special characters used for regular expressions 895 * in @string, for instance "a.b*c" becomes "a\.b\*c". This 896 * function is useful to dynamically generate regular expressions. 897 * 898 * @string can contain nul characters that are replaced with "\0", 899 * in this case remember to specify the correct length of @string 900 * in @length. 901 * 902 * Params: 903 * str = the string to escape 904 * length = the length of @string, or -1 if @string is nul-terminated 905 * 906 * Returns: a newly-allocated escaped string 907 * 908 * Since: 2.14 909 */ 910 public static string escapeString(string str) 911 { 912 auto retStr = g_regex_escape_string(Str.toStringz(str), cast(int)str.length); 913 914 scope(exit) Str.freeString(retStr); 915 return Str.toString(retStr); 916 } 917 918 /** 919 * Scans for a match in @string for @pattern. 920 * 921 * This function is equivalent to g_regex_match() but it does not 922 * require to compile the pattern with g_regex_new(), avoiding some 923 * lines of code when you need just to do a match without extracting 924 * substrings, capture counts, and so on. 925 * 926 * If this function is to be called on the same @pattern more than 927 * once, it's more efficient to compile the pattern once with 928 * g_regex_new() and then use g_regex_match(). 929 * 930 * Params: 931 * pattern = the regular expression 932 * str = the string to scan for matches 933 * compileOptions = compile options for the regular expression, or 0 934 * matchOptions = match options, or 0 935 * 936 * Returns: %TRUE if the string matched, %FALSE otherwise 937 * 938 * Since: 2.14 939 */ 940 public static bool matchSimple(string pattern, string str, GRegexCompileFlags compileOptions, GRegexMatchFlags matchOptions) 941 { 942 return g_regex_match_simple(Str.toStringz(pattern), Str.toStringz(str), compileOptions, matchOptions) != 0; 943 } 944 945 /** 946 * Breaks the string on the pattern, and returns an array of 947 * the tokens. If the pattern contains capturing parentheses, 948 * then the text for each of the substrings will also be returned. 949 * If the pattern does not match anywhere in the string, then the 950 * whole string is returned as the first token. 951 * 952 * This function is equivalent to g_regex_split() but it does 953 * not require to compile the pattern with g_regex_new(), avoiding 954 * some lines of code when you need just to do a split without 955 * extracting substrings, capture counts, and so on. 956 * 957 * If this function is to be called on the same @pattern more than 958 * once, it's more efficient to compile the pattern once with 959 * g_regex_new() and then use g_regex_split(). 960 * 961 * As a special case, the result of splitting the empty string "" 962 * is an empty vector, not a vector containing a single string. 963 * The reason for this special case is that being able to represent 964 * a empty vector is typically more useful than consistent handling 965 * of empty elements. If you do need to represent empty elements, 966 * you'll need to check for the empty string before calling this 967 * function. 968 * 969 * A pattern that can match empty strings splits @string into 970 * separate characters wherever it matches the empty string between 971 * characters. For example splitting "ab c" using as a separator 972 * "\s*", you will get "a", "b" and "c". 973 * 974 * Params: 975 * pattern = the regular expression 976 * str = the string to scan for matches 977 * compileOptions = compile options for the regular expression, or 0 978 * matchOptions = match options, or 0 979 * 980 * Returns: a %NULL-terminated array of strings. Free 981 * it using g_strfreev() 982 * 983 * Since: 2.14 984 */ 985 public static string[] splitSimple(string pattern, string str, GRegexCompileFlags compileOptions, GRegexMatchFlags matchOptions) 986 { 987 auto retStr = g_regex_split_simple(Str.toStringz(pattern), Str.toStringz(str), compileOptions, matchOptions); 988 989 scope(exit) Str.freeStringArray(retStr); 990 return Str.toStringArray(retStr); 991 } 992 }