1 /*
2  * This file is part of gtkD.
3  *
4  * gtkD is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 3
7  * of the License, or (at your option) any later version, with
8  * some exceptions, please read the COPYING file.
9  *
10  * gtkD is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with gtkD; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
18  */
19 
20 module utils.XML;
21 
22 import std.algorithm;
23 import std.array;
24 import std.exception;
25 import std.range;
26 import std.string;
27 import std.traits: isSomeChar;
28 import std.uni;
29 
30 struct XMLNode
31 {
32 	XMLNodeType type;
33 
34 	string value;
35 	string[string] attributes;
36 }
37 
38 enum XMLNodeType
39 {
40 	None,
41 	PI,
42 	StartTag,
43 	Text,
44 	CData,
45 	DocType,
46 	Comment,
47 	EmptyTag,
48 	EndTag,
49 	DocumentEnd
50 }
51 
52 class XMLReader(T)
53 	if (isInputRange!T &&  isSomeChar!(ElementType!T) )
54 {
55 	XMLNode front;
56 
57 	static if ( is( T == string ) )
58 		private ByChar document;
59 	else
60 		private T document;
61 
62 	this(T document)
63 	{
64 		static if ( is( T == string ) )
65 			this.document = ByChar(document);
66 		else
67 			this.document = document;
68 
69 		popFront();
70 	}
71 
72 	void popFront()
73 	{
74 		front = XMLNode();
75 
76 		if ( document.empty )
77 		{
78 			front.type = XMLNodeType.DocumentEnd;
79 			return;
80 		}
81 
82 		if ( document.front == '<' )
83 			parseTag();
84 		else
85 			parseText();
86 	}
87 
88 	@property bool empty()
89 	{
90 		return document.empty && front.type == XMLNodeType.DocumentEnd;
91 	}
92 
93 	private void parseTag()
94 	{
95 		document.popFront();
96 
97 		switch ( document.front )
98 		{
99 			case '!':
100 				document.popFront();
101 				switch ( document.front )
102 				{
103 					case '[':
104 						enforce(document.skipOver("[CDATA["));
105 						parseCDATA();
106 						break;
107 					case 'D':
108 						enforce(document.skipOver("!DOCTYPE"));
109 						parseDocType();
110 						break;
111 					case '-':
112 						enforce(document.skipOver("--"));
113 						parseComment();
114 						break;
115 					default:
116 						throw new XMLException("Invalid XML tag");
117 				}
118 				break;
119 			case '?':
120 				document.popFront();
121 				parsePI();
122 				break;
123 			case '/':
124 				document.popFront();
125 				parseStartTag();
126 				front.type = XMLNodeType.EndTag;
127 				break;
128 			default:
129 				parseStartTag();
130 				break;
131 		}
132 
133 		skipWhitespace();
134 	}
135 
136 	private void parseCDATA()
137 	{
138 		front.type = XMLNodeType.CData;
139 		auto buff = appender!string();
140 
141 		while ( !document.empty )
142 		{
143 			if ( document.front == ']' )
144 			{
145 				document.popFront();
146 
147 				if ( document.front != ']' )
148 				{
149 					buff.put(']');
150 					buff.put(document.front);
151 					document.popFront();
152 					continue;
153 				}
154 
155 				document.popFront();
156 
157 				if ( document.front == '>' )
158 				{
159 					document.popFront();
160 					return;
161 				}
162 			}
163 
164 			buff.put(document.front);
165 			document.popFront();
166 		}
167 
168 		front.value = buff.data;
169 	}
170 
171 	private void parseDocType()
172 	{
173 		front.type = XMLNodeType.DocType;
174 		auto buff = appender!string();
175 		int bracketCount;
176 
177 		skipWhitespace();
178 
179 		while ( !document.empty )
180 		{
181 			switch ( document.front )
182 			{
183 				case '[':
184 					bracketCount++;
185 					break;
186 				case ']':
187 					bracketCount--;
188 					break;
189 				case '>':
190 					if ( bracketCount == 0 )
191 					{
192 						document.popFront();
193 						return;
194 					}
195 					break;
196 				default: break;
197 			}
198 
199 			buff.put(document.front);
200 			document.popFront();
201 		}
202 
203 		front.value = buff.data.stripRight();
204 	}
205 
206 	private void parseComment()
207 	{
208 		front.type = XMLNodeType.Comment;
209 		auto buff = appender!string();
210 
211 		while ( !document.empty )
212 		{
213 			if ( document.front == '-' )
214 			{
215 				document.popFront();
216 
217 				if ( document.front != '-' )
218 				{
219 					buff.put('-');
220 					buff.put(document.front);
221 					document.popFront();
222 					continue;
223 				}
224 
225 				document.popFront();
226 
227 				if ( document.front == '>' )
228 				{
229 					document.popFront();
230 					return;
231 				}
232 
233 				throw new XMLException("-- not allowed in comments.");
234 			}
235 
236 			buff.put(document.front);
237 			document.popFront();
238 		}
239 
240 		front.value = buff.data.strip();
241 	}
242 
243 	private void parsePI()
244 	{
245 		front.type = XMLNodeType.PI;
246 		auto buff = appender!string();
247 
248 		while ( !document.empty )
249 		{
250 			if ( document.front == '?' )
251 			{
252 				document.popFront();
253 
254 				if ( document.front == '>' )
255 				{
256 					document.popFront();
257 					return;
258 				}
259 
260 				buff.put('?');
261 			}
262 
263 			buff.put(document.front);
264 			document.popFront();
265 		}
266 
267 		front.value = buff.data.stripRight();
268 	}
269 
270 	private void parseStartTag()
271 	{
272 		front.type = XMLNodeType.StartTag;
273 		auto buff = appender!string();
274 
275 		while ( !document.empty && !(document.front.isWhite() || document.front == '/' || document.front == '>') )
276 		{
277 			buff.put(document.front);
278 			document.popFront();
279 		}
280 
281 		front.value = buff.data;
282 
283 		while ( !document.empty )
284 		{
285 			skipWhitespace();
286 
287 			if ( document.front == '/' )
288 			{
289 				front.type = XMLNodeType.EmptyTag;
290 				document.popFront();
291 			}
292 
293 			if ( document.front == '>' )
294 			{
295 				document.popFront();
296 				return;
297 			}
298 
299 			buff = appender!string();
300 			string attName;
301 
302 			while ( !document.empty && !(document.front.isWhite() || document.front == '=') )
303 			{
304 				buff.put(document.front);
305 				document.popFront();
306 			}
307 
308 			document.popFront();
309 			if ( document.front == '=' )
310 				document.popFront();
311 
312 			attName = buff.data;
313 			buff = appender!string();
314 
315 			if ( document.front.isWhite() )
316 				skipWhitespace();
317 
318 			ElementType!(typeof(document)) quote = document.front;
319 			document.popFront();
320 
321 			AttValue: while ( !document.empty )
322 			{
323 				switch ( document.front )
324 				{
325 					case '\'':
326 					case '"':
327 						if ( document.front != quote )
328 							goto default;
329 
330 						document.popFront();
331 						break AttValue;
332 					case '&':
333 						parseAmpersand(buff);
334 						break;
335 					default:
336 						buff.put(document.front);
337 						break;
338 				}
339 
340 				document.popFront();
341 			}
342 
343 			front.attributes[attName] = buff.data;
344 		}
345 	}
346 
347 	private void parseText()
348 	{
349 		front.type = XMLNodeType.Text;
350 		auto buff = appender!string();
351 
352 		Text: while ( !document.empty )
353 		{
354 			switch ( document.front )
355 			{
356 				case '<':
357 					break Text;
358 				case '&':
359 					parseAmpersand(buff);
360 					break;
361 				default:
362 					buff.put(document.front);
363 					break;
364 			}
365 
366 			document.popFront();
367 		}
368 
369 		front.value = buff.data.stripRight();
370 	}
371 
372 	private void skipWhitespace()
373 	{
374 		while ( !document.empty && isWhite(document.front) )
375 			document.popFront();
376 	}
377 
378 	private void parseAmpersand(Appender!(string) buff)
379 	{
380 		ElementType!(typeof(document))[5] sequence;
381 		int index;
382 
383 		document.popFront();
384 
385 		while ( document.front != ';' )
386 		{
387 			sequence[index++] = document.front;
388 			document.popFront();
389 		}
390 
391 		switch ( sequence[0 .. index] )
392 		{
393 			case "#34":
394 			case "quot":
395 				buff.put('"');
396 				break;
397 			case "#38":
398 			case "amp":
399 				buff.put('&');
400 				break;
401 			case "#39":
402 			case "apos":
403 				buff.put('\'');
404 				break;
405 			case "#60":
406 			case "lt":
407 				buff.put('<');
408 				break;
409 			case "#62":
410 			case "gt":
411 				buff.put('>');
412 				break;
413 			default:
414 				throw new XMLException("Unregonized escape secuence");
415 		}
416 	}
417 
418 	unittest
419 	{
420 		auto reader = new XMLReader("&lt;test&gt;");
421 		assert(reader.front.value == "<test>");
422 	}
423 }
424 
425 /**
426  * Skip the current tag and it's content.
427  * Leaves the reader pointing to the end tag with the same depth.
428  */
429 void skipTag(T)(XMLReader!T reader)
430 {
431 	if ( reader.front.type == XMLNodeType.EmptyTag )
432 		return;
433 	if ( reader.front.type != XMLNodeType.StartTag )
434 	{
435 		reader.popFront();
436 		return;
437 	}
438 
439 	string tagName = reader.front.value;
440 	size_t depth;
441 
442 	while ( !reader.empty )
443 	{
444 		if ( reader.front.type == XMLNodeType.StartTag )
445 			depth++;
446 
447 		if ( reader.front.type == XMLNodeType.EndTag )
448 			depth--;
449 
450 		if ( depth == 0 && reader.front.value == tagName )
451 			return;
452 
453 		reader.popFront();
454 	}
455 }
456 
457 /**
458  * Is this an end tag with name tagName.
459  */
460 bool endTag(T)(XMLReader!T reader, string tagName)
461 {
462 	return reader.front.type == XMLNodeType.EndTag && reader.front.value == tagName;
463 }
464 
465 /// ditto.
466 bool endTag(T)(XMLReader!T reader, string[] tagNames ...)
467 {
468 	return reader.front.type == XMLNodeType.EndTag && tagNames.canFind(reader.front.value);
469 }
470 
471 class XMLException : Exception
472 {
473 	this (string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
474 	{
475 		super(msg, file, line, next);
476 	}
477 }
478 
479 struct ByChar
480 {
481 	string data;
482 
483 	@property char front()
484 	{
485 		return data[0];
486 	}
487 
488 	@property bool empty()
489 	{
490 		return !data.length;
491 	}
492 
493 	void popFront()
494 	{
495 		assert(data.length, "Attempting to popFront() past the end of an array");
496 		data = data[1 .. $];
497 	}
498 
499 	@property ByChar save()
500 	{
501 		return this;
502 	}
503 
504 	alias data this;
505 }