1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 module utils.XML; 21 22 import std.algorithm; 23 import std.array; 24 import std.conv : to; 25 import std.exception; 26 import std.range; 27 import std.string; 28 import std.traits: isSomeChar; 29 import std.uni; 30 31 struct XMLNode 32 { 33 XMLNodeType type; 34 35 string value; 36 string[string] attributes; 37 } 38 39 enum XMLNodeType 40 { 41 None, 42 PI, 43 StartTag, 44 Text, 45 CData, 46 DocType, 47 Comment, 48 EmptyTag, 49 EndTag, 50 DocumentEnd 51 } 52 53 class XMLReader(T) 54 if (isInputRange!T && isSomeChar!(ElementType!T) ) 55 { 56 XMLNode front; 57 string fileName; 58 59 static if ( is( T == string ) ) 60 private CountLines!ByChar document; 61 else 62 private CountLines!T document; 63 64 /** 65 * Params: 66 * document = The XML document to parse. 67 * fileName = File name to print in diagnostic messages. 68 */ 69 this(T document, string fileName = null) 70 { 71 static if ( is( T == string ) ) 72 this.document = CountLines!ByChar(ByChar(document)); 73 else 74 this.document = CountLines!T(document); 75 76 this.fileName = fileName; 77 78 popFront(); 79 } 80 81 @property size_t line() 82 { 83 return document.line; 84 } 85 86 void popFront() 87 { 88 front = XMLNode(); 89 90 if ( document.empty ) 91 { 92 front.type = XMLNodeType.DocumentEnd; 93 return; 94 } 95 96 if ( document.front == '<' ) 97 parseTag(); 98 else 99 parseText(); 100 } 101 102 @property bool empty() 103 { 104 return document.empty && front.type == XMLNodeType.DocumentEnd; 105 } 106 107 private void parseTag() 108 { 109 document.popFront(); 110 111 switch ( document.front ) 112 { 113 case '!': 114 document.popFront(); 115 switch ( document.front ) 116 { 117 case '[': 118 enforce(document.skipOver("[CDATA[")); 119 parseCDATA(); 120 break; 121 case 'D': 122 enforce(document.skipOver("!DOCTYPE")); 123 parseDocType(); 124 break; 125 case '-': 126 enforce(document.skipOver("--")); 127 parseComment(); 128 break; 129 default: 130 throw new XMLException(this, "Invalid XML tag"); 131 } 132 break; 133 case '?': 134 document.popFront(); 135 parsePI(); 136 break; 137 case '/': 138 document.popFront(); 139 parseStartTag(); 140 front.type = XMLNodeType.EndTag; 141 break; 142 default: 143 parseStartTag(); 144 break; 145 } 146 147 skipWhitespace(); 148 } 149 150 private void parseCDATA() 151 { 152 front.type = XMLNodeType.CData; 153 auto buff = appender!string(); 154 155 while ( !document.empty ) 156 { 157 if ( document.front == ']' ) 158 { 159 document.popFront(); 160 161 if ( document.front != ']' ) 162 { 163 buff.put(']'); 164 buff.put(document.front); 165 document.popFront(); 166 continue; 167 } 168 169 document.popFront(); 170 171 if ( document.front == '>' ) 172 { 173 document.popFront(); 174 return; 175 } 176 } 177 178 buff.put(document.front); 179 document.popFront(); 180 } 181 182 front.value = buff.data; 183 } 184 185 private void parseDocType() 186 { 187 front.type = XMLNodeType.DocType; 188 auto buff = appender!string(); 189 int bracketCount; 190 191 skipWhitespace(); 192 193 while ( !document.empty ) 194 { 195 switch ( document.front ) 196 { 197 case '[': 198 bracketCount++; 199 break; 200 case ']': 201 bracketCount--; 202 break; 203 case '>': 204 if ( bracketCount == 0 ) 205 { 206 document.popFront(); 207 return; 208 } 209 break; 210 default: break; 211 } 212 213 buff.put(document.front); 214 document.popFront(); 215 } 216 217 front.value = buff.data.stripRight(); 218 } 219 220 private void parseComment() 221 { 222 front.type = XMLNodeType.Comment; 223 auto buff = appender!string(); 224 225 while ( !document.empty ) 226 { 227 if ( document.front == '-' ) 228 { 229 document.popFront(); 230 231 if ( document.front != '-' ) 232 { 233 buff.put('-'); 234 buff.put(document.front); 235 document.popFront(); 236 continue; 237 } 238 239 document.popFront(); 240 241 if ( document.front == '>' ) 242 { 243 document.popFront(); 244 return; 245 } 246 247 throw new XMLException(this, "-- not allowed in comments."); 248 } 249 250 buff.put(document.front); 251 document.popFront(); 252 } 253 254 front.value = buff.data.strip(); 255 } 256 257 private void parsePI() 258 { 259 front.type = XMLNodeType.PI; 260 auto buff = appender!string(); 261 262 while ( !document.empty ) 263 { 264 if ( document.front == '?' ) 265 { 266 document.popFront(); 267 268 if ( document.front == '>' ) 269 { 270 document.popFront(); 271 return; 272 } 273 274 buff.put('?'); 275 } 276 277 buff.put(document.front); 278 document.popFront(); 279 } 280 281 front.value = buff.data.stripRight(); 282 } 283 284 private void parseStartTag() 285 { 286 front.type = XMLNodeType.StartTag; 287 auto buff = appender!string(); 288 289 while ( !document.empty && !(document.front.isWhite() || document.front == '/' || document.front == '>') ) 290 { 291 buff.put(document.front); 292 document.popFront(); 293 } 294 295 front.value = buff.data; 296 297 while ( !document.empty ) 298 { 299 skipWhitespace(); 300 301 if ( document.front == '/' ) 302 { 303 front.type = XMLNodeType.EmptyTag; 304 document.popFront(); 305 } 306 307 if ( document.front == '>' ) 308 { 309 document.popFront(); 310 return; 311 } 312 313 buff = appender!string(); 314 string attName; 315 316 while ( !document.empty && !(document.front.isWhite() || document.front == '=') ) 317 { 318 buff.put(document.front); 319 document.popFront(); 320 } 321 322 document.popFront(); 323 if ( document.front == '=' ) 324 document.popFront(); 325 326 attName = buff.data; 327 buff = appender!string(); 328 329 if ( document.front.isWhite() ) 330 skipWhitespace(); 331 332 ElementType!(typeof(document)) quote = document.front; 333 document.popFront(); 334 335 AttValue: while ( !document.empty ) 336 { 337 switch ( document.front ) 338 { 339 case '\'': 340 case '"': 341 if ( document.front != quote ) 342 goto default; 343 344 document.popFront(); 345 break AttValue; 346 case '&': 347 parseAmpersand(buff); 348 break; 349 default: 350 buff.put(document.front); 351 break; 352 } 353 354 document.popFront(); 355 } 356 357 front.attributes[attName] = buff.data; 358 } 359 } 360 361 private void parseText() 362 { 363 front.type = XMLNodeType.Text; 364 auto buff = appender!string(); 365 366 Text: while ( !document.empty ) 367 { 368 switch ( document.front ) 369 { 370 case '<': 371 break Text; 372 case '&': 373 parseAmpersand(buff); 374 break; 375 default: 376 buff.put(document.front); 377 break; 378 } 379 380 document.popFront(); 381 } 382 383 front.value = buff.data.stripRight(); 384 } 385 386 private void skipWhitespace() 387 { 388 while ( !document.empty && isWhite(document.front) ) 389 document.popFront(); 390 } 391 392 private void parseAmpersand(Appender!(string) buff) 393 { 394 ElementType!(typeof(document))[5] sequence; 395 int index; 396 397 document.popFront(); 398 399 while ( document.front != ';' ) 400 { 401 sequence[index++] = document.front; 402 document.popFront(); 403 } 404 405 switch ( sequence[0 .. index] ) 406 { 407 case "#34": 408 case "quot": 409 buff.put('"'); 410 break; 411 case "#38": 412 case "amp": 413 buff.put('&'); 414 break; 415 case "#39": 416 case "apos": 417 buff.put('\''); 418 break; 419 case "#60": 420 case "lt": 421 buff.put('<'); 422 break; 423 case "#62": 424 case "gt": 425 buff.put('>'); 426 break; 427 default: 428 throw new XMLException(this, "Unregonized escape secuence"); 429 } 430 } 431 432 unittest 433 { 434 auto reader = new XMLReader("<test>"); 435 assert(reader.front.value == "<test>"); 436 } 437 } 438 439 /** 440 * Skip the current tag and it's content. 441 * Leaves the reader pointing to the end tag with the same depth. 442 */ 443 void skipTag(T)(XMLReader!T reader) 444 { 445 if ( reader.front.type == XMLNodeType.EmptyTag ) 446 return; 447 if ( reader.front.type != XMLNodeType.StartTag ) 448 { 449 reader.popFront(); 450 return; 451 } 452 453 string tagName = reader.front.value; 454 size_t depth; 455 456 while ( !reader.empty ) 457 { 458 if ( reader.front.type == XMLNodeType.StartTag ) 459 depth++; 460 461 if ( reader.front.type == XMLNodeType.EndTag ) 462 depth--; 463 464 if ( depth == 0 && reader.front.value == tagName ) 465 return; 466 467 reader.popFront(); 468 } 469 } 470 471 /** 472 * Is this an end tag with name tagName. 473 */ 474 bool endTag(T)(XMLReader!T reader, string tagName) 475 { 476 return reader.front.type == XMLNodeType.EndTag && reader.front.value == tagName; 477 } 478 479 /// ditto. 480 bool endTag(T)(XMLReader!T reader, string[] tagNames ...) 481 { 482 return reader.front.type == XMLNodeType.EndTag && tagNames.canFind(reader.front.value); 483 } 484 485 class XMLException : Exception 486 { 487 this (T)(XMLReader!T reader, string msg) 488 { 489 super(msg, reader.fileName, reader.line, null); 490 } 491 492 override string toString() 493 { 494 string s; 495 toString((buf) { s ~= buf; }); 496 return s; 497 } 498 499 override void toString(scope void delegate(in char[]) sink) const 500 { 501 sink(file); 502 sink("("); sink(to!string(line)); sink(")"); 503 504 if (msg.length) 505 { 506 sink(": "); sink(msg); 507 } 508 } 509 510 } 511 512 struct ByChar 513 { 514 string data; 515 516 @property char front() 517 { 518 return data[0]; 519 } 520 521 @property bool empty() 522 { 523 return !data.length; 524 } 525 526 void popFront() 527 { 528 assert(data.length, "Attempting to popFront() past the end of an array"); 529 data = data[1 .. $]; 530 } 531 532 @property ByChar save() 533 { 534 return this; 535 } 536 537 alias data this; 538 } 539 540 struct CountLines(Source) if (isSomeChar!(ElementType!Source)) 541 { 542 import std.range.primitives : ElementType; 543 544 Source src; 545 size_t line = 1; 546 547 this(Source src) 548 { 549 this.src = src; 550 } 551 552 @property ElementType!Source front() 553 { 554 return src.front; 555 } 556 557 @property bool empty() 558 { 559 return src.empty; 560 } 561 562 void popFront() 563 { 564 src.popFront(); 565 566 if ( src.front == '\n' ) 567 line++; 568 } 569 570 @property typeof(this) save() 571 { 572 return typeof(this)(src.save); 573 } 574 }