1 /* 2 * This file is part of gtkD. 3 * 4 * gtkD is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation; either version 3 7 * of the License, or (at your option) any later version, with 8 * some exceptions, please read the COPYING file. 9 * 10 * gtkD is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with gtkD; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 18 */ 19 20 module utils.XML; 21 22 import std.algorithm; 23 import std.array; 24 import std.exception; 25 import std.range; 26 import std.string; 27 import std.traits: isSomeChar; 28 import std.uni; 29 30 struct XMLNode 31 { 32 XMLNodeType type; 33 34 string value; 35 string[string] attributes; 36 } 37 38 enum XMLNodeType 39 { 40 None, 41 PI, 42 StartTag, 43 Text, 44 CData, 45 DocType, 46 Comment, 47 EmptyTag, 48 EndTag, 49 DocumentEnd 50 } 51 52 class XMLReader(T) 53 if (isInputRange!T && isSomeChar!(ElementType!T) ) 54 { 55 XMLNode front; 56 57 static if ( is( T == string ) ) 58 private ByChar document; 59 else 60 private T document; 61 62 this(T document) 63 { 64 static if ( is( T == string ) ) 65 this.document = ByChar(document); 66 else 67 this.document = document; 68 69 popFront(); 70 } 71 72 void popFront() 73 { 74 front = XMLNode(); 75 76 if ( document.empty ) 77 { 78 front.type = XMLNodeType.DocumentEnd; 79 return; 80 } 81 82 if ( document.front == '<' ) 83 parseTag(); 84 else 85 parseText(); 86 } 87 88 @property bool empty() 89 { 90 return document.empty && front.type == XMLNodeType.DocumentEnd; 91 } 92 93 private void parseTag() 94 { 95 document.popFront(); 96 97 switch ( document.front ) 98 { 99 case '!': 100 document.popFront(); 101 switch ( document.front ) 102 { 103 case '[': 104 enforce(document.skipOver("[CDATA[")); 105 parseCDATA(); 106 break; 107 case 'D': 108 enforce(document.skipOver("!DOCTYPE")); 109 parseDocType(); 110 break; 111 case '-': 112 enforce(document.skipOver("--")); 113 parseComment(); 114 break; 115 default: 116 throw new XMLException("Invalid XML tag"); 117 } 118 break; 119 case '?': 120 document.popFront(); 121 parsePI(); 122 break; 123 case '/': 124 document.popFront(); 125 parseStartTag(); 126 front.type = XMLNodeType.EndTag; 127 break; 128 default: 129 parseStartTag(); 130 break; 131 } 132 133 skipWhitespace(); 134 } 135 136 private void parseCDATA() 137 { 138 front.type = XMLNodeType.CData; 139 auto buff = appender!string(); 140 141 while ( !document.empty ) 142 { 143 if ( document.front == ']' ) 144 { 145 document.popFront(); 146 147 if ( document.front != ']' ) 148 { 149 buff.put(']'); 150 buff.put(document.front); 151 document.popFront(); 152 continue; 153 } 154 155 document.popFront(); 156 157 if ( document.front == '>' ) 158 { 159 document.popFront(); 160 return; 161 } 162 } 163 164 buff.put(document.front); 165 document.popFront(); 166 } 167 168 front.value = buff.data; 169 } 170 171 private void parseDocType() 172 { 173 front.type = XMLNodeType.DocType; 174 auto buff = appender!string(); 175 int bracketCount; 176 177 skipWhitespace(); 178 179 while ( !document.empty ) 180 { 181 switch ( document.front ) 182 { 183 case '[': 184 bracketCount++; 185 break; 186 case ']': 187 bracketCount--; 188 break; 189 case '>': 190 if ( bracketCount == 0 ) 191 { 192 document.popFront(); 193 return; 194 } 195 break; 196 default: break; 197 } 198 199 buff.put(document.front); 200 document.popFront(); 201 } 202 203 front.value = buff.data.stripRight(); 204 } 205 206 private void parseComment() 207 { 208 front.type = XMLNodeType.Comment; 209 auto buff = appender!string(); 210 211 while ( !document.empty ) 212 { 213 if ( document.front == '-' ) 214 { 215 document.popFront(); 216 217 if ( document.front != '-' ) 218 { 219 buff.put('-'); 220 buff.put(document.front); 221 document.popFront(); 222 continue; 223 } 224 225 document.popFront(); 226 227 if ( document.front == '>' ) 228 { 229 document.popFront(); 230 return; 231 } 232 233 throw new XMLException("-- not allowed in comments."); 234 } 235 236 buff.put(document.front); 237 document.popFront(); 238 } 239 240 front.value = buff.data.strip(); 241 } 242 243 private void parsePI() 244 { 245 front.type = XMLNodeType.PI; 246 auto buff = appender!string(); 247 248 while ( !document.empty ) 249 { 250 if ( document.front == '?' ) 251 { 252 document.popFront(); 253 254 if ( document.front == '>' ) 255 { 256 document.popFront(); 257 return; 258 } 259 260 buff.put('?'); 261 } 262 263 buff.put(document.front); 264 document.popFront(); 265 } 266 267 front.value = buff.data.stripRight(); 268 } 269 270 private void parseStartTag() 271 { 272 front.type = XMLNodeType.StartTag; 273 auto buff = appender!string(); 274 275 while ( !document.empty && !(document.front.isWhite() || document.front == '/' || document.front == '>') ) 276 { 277 buff.put(document.front); 278 document.popFront(); 279 } 280 281 front.value = buff.data; 282 283 while ( !document.empty ) 284 { 285 skipWhitespace(); 286 287 if ( document.front == '/' ) 288 { 289 front.type = XMLNodeType.EmptyTag; 290 document.popFront(); 291 } 292 293 if ( document.front == '>' ) 294 { 295 document.popFront(); 296 return; 297 } 298 299 buff = appender!string(); 300 string attName; 301 302 while ( !document.empty && !(document.front.isWhite() || document.front == '=') ) 303 { 304 buff.put(document.front); 305 document.popFront(); 306 } 307 308 document.popFront(); 309 if ( document.front == '=' ) 310 document.popFront(); 311 312 attName = buff.data; 313 buff = appender!string(); 314 315 if ( document.front.isWhite() ) 316 skipWhitespace(); 317 318 ElementType!(typeof(document)) quote = document.front; 319 document.popFront(); 320 321 AttValue: while ( !document.empty ) 322 { 323 switch ( document.front ) 324 { 325 case '\'': 326 case '"': 327 if ( document.front != quote ) 328 goto default; 329 330 document.popFront(); 331 break AttValue; 332 case '&': 333 parseAmpersand(buff); 334 break; 335 default: 336 buff.put(document.front); 337 break; 338 } 339 340 document.popFront(); 341 } 342 343 front.attributes[attName] = buff.data; 344 } 345 } 346 347 private void parseText() 348 { 349 front.type = XMLNodeType.Text; 350 auto buff = appender!string(); 351 352 Text: while ( !document.empty ) 353 { 354 switch ( document.front ) 355 { 356 case '<': 357 break Text; 358 case '&': 359 parseAmpersand(buff); 360 break; 361 default: 362 buff.put(document.front); 363 break; 364 } 365 366 document.popFront(); 367 } 368 369 front.value = buff.data.stripRight(); 370 } 371 372 private void skipWhitespace() 373 { 374 while ( !document.empty && isWhite(document.front) ) 375 document.popFront(); 376 } 377 378 private void parseAmpersand(Appender!(string) buff) 379 { 380 ElementType!(typeof(document))[5] sequence; 381 int index; 382 383 document.popFront(); 384 385 while ( document.front != ';' ) 386 { 387 sequence[index++] = document.front; 388 document.popFront(); 389 } 390 391 switch ( sequence[0 .. index] ) 392 { 393 case "#34": 394 case "quot": 395 buff.put('"'); 396 break; 397 case "#38": 398 case "amp": 399 buff.put('&'); 400 break; 401 case "#39": 402 case "apos": 403 buff.put('\''); 404 break; 405 case "#60": 406 case "lt": 407 buff.put('<'); 408 break; 409 case "#62": 410 case "gt": 411 buff.put('>'); 412 break; 413 default: 414 throw new XMLException("Unregonized escape secuence"); 415 } 416 } 417 418 unittest 419 { 420 auto reader = new XMLReader("<test>"); 421 assert(reader.front.value == "<test>"); 422 } 423 } 424 425 /** 426 * Skip the current tag and it's content. 427 * Leaves the reader pointing to the end tag with the same depth. 428 */ 429 void skipTag(T)(XMLReader!T reader) 430 { 431 if ( reader.front.type == XMLNodeType.EmptyTag ) 432 return; 433 if ( reader.front.type != XMLNodeType.StartTag ) 434 { 435 reader.popFront(); 436 return; 437 } 438 439 string tagName = reader.front.value; 440 size_t depth; 441 442 while ( !reader.empty ) 443 { 444 if ( reader.front.type == XMLNodeType.StartTag ) 445 depth++; 446 447 if ( reader.front.type == XMLNodeType.EndTag ) 448 depth--; 449 450 if ( depth == 0 && reader.front.value == tagName ) 451 return; 452 453 reader.popFront(); 454 } 455 } 456 457 /** 458 * Is this an end tag with name tagName. 459 */ 460 bool endTag(T)(XMLReader!T reader, string tagName) 461 { 462 return reader.front.type == XMLNodeType.EndTag && reader.front.value == tagName; 463 } 464 465 /// ditto. 466 bool endTag(T)(XMLReader!T reader, string[] tagNames ...) 467 { 468 return reader.front.type == XMLNodeType.EndTag && tagNames.canFind(reader.front.value); 469 } 470 471 class XMLException : Exception 472 { 473 this (string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 474 { 475 super(msg, file, line, next); 476 } 477 } 478 479 struct ByChar 480 { 481 string data; 482 483 @property char front() 484 { 485 return data[0]; 486 } 487 488 @property bool empty() 489 { 490 return !data.length; 491 } 492 493 void popFront() 494 { 495 assert(data.length, "Attempting to popFront() past the end of an array"); 496 data = data[1 .. $]; 497 } 498 499 @property ByChar save() 500 { 501 return this; 502 } 503 504 alias data this; 505 }