pugixml.cpp
Go to the documentation of this file.
1
14#ifndef SOURCE_PUGIXML_CPP
15#define SOURCE_PUGIXML_CPP
16
17#include "pugixml.hpp"
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <assert.h>
23
24#ifdef PUGIXML_WCHAR_MODE
25# include <wchar.h>
26#endif
27
28#ifndef PUGIXML_NO_XPATH
29# include <math.h>
30# include <float.h>
31# ifdef PUGIXML_NO_EXCEPTIONS
32# include <setjmp.h>
33# endif
34#endif
35
36#ifndef PUGIXML_NO_STL
37# include <istream>
38# include <ostream>
39# include <string>
40#endif
41
42// For placement new
43#include <new>
44
45#ifdef _MSC_VER
46# pragma warning(push)
47# pragma warning(disable: 4127) // conditional expression is constant
48# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
49# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
50# pragma warning(disable: 4702) // unreachable code
51# pragma warning(disable: 4996) // this function or variable may be unsafe
52# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
53#endif
54
55#ifdef __INTEL_COMPILER
56# pragma warning(disable: 177) // function was declared but never referenced
57# pragma warning(disable: 279) // controlling expression is constant
58# pragma warning(disable: 1478 1786) // function was declared "deprecated"
59# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
60#endif
61
62#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
63# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
64#endif
65
66#ifdef __BORLANDC__
67# pragma option push
68# pragma warn -8008 // condition is always false
69# pragma warn -8066 // unreachable code
70#endif
71
72#ifdef __SNC__
73// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
74# pragma diag_suppress=178 // function was declared but never referenced
75# pragma diag_suppress=237 // controlling expression is constant
76#endif
77
78// Inlining controls
79#if defined(_MSC_VER) && _MSC_VER >= 1300
80# define PUGI__NO_INLINE __declspec(noinline)
81#elif defined(__GNUC__)
82# define PUGI__NO_INLINE __attribute__((noinline))
83#else
84# define PUGI__NO_INLINE
85#endif
86
87// Branch weight controls
88#if defined(__GNUC__)
89# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
90#else
91# define PUGI__UNLIKELY(cond) (cond)
92#endif
93
94// Simple static assertion
95#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
96
97// Digital Mars C++ bug workaround for passing char loaded from memory via stack
98#ifdef __DMC__
99# define PUGI__DMC_VOLATILE volatile
100#else
101# define PUGI__DMC_VOLATILE
102#endif
103
104// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
105#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
106using std::memcpy;
107using std::memmove;
108#endif
109
110// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
111#if defined(_MSC_VER) && !defined(__S3E__)
112# define PUGI__MSVC_CRT_VERSION _MSC_VER
113#endif
114
115#ifdef PUGIXML_HEADER_ONLY
116# define PUGI__NS_BEGIN namespace pugi { namespace impl {
117# define PUGI__NS_END } }
118# define PUGI__FN inline
119# define PUGI__FN_NO_INLINE inline
120#else
121# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
122# define PUGI__NS_BEGIN namespace pugi { namespace impl {
123# define PUGI__NS_END } }
124# else
125# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
126# define PUGI__NS_END } } }
127# endif
128# define PUGI__FN
129# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
130#endif
131
132// uintptr_t
133#if !defined(_MSC_VER) || _MSC_VER >= 1600
134# include <stdint.h>
135#else
136# ifndef _UINTPTR_T_DEFINED
137// No native uintptr_t in MSVC6 and in some WinCE versions
138typedef size_t uintptr_t;
139#define _UINTPTR_T_DEFINED
140# endif
142 typedef unsigned __int8 uint8_t;
143 typedef unsigned __int16 uint16_t;
144 typedef unsigned __int32 uint32_t;
146#endif
147
148// Memory allocation
150 PUGI__FN void* default_allocate(size_t size)
151 {
152 return malloc(size);
153 }
154
156 {
157 free(ptr);
158 }
159
160 template <typename T>
162 {
163 static allocation_function allocate;
164 static deallocation_function deallocate;
165 };
166
167 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
168 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
169
172
173// String utilities
175 // Get string length
176 PUGI__FN size_t strlength(const char_t* s)
177 {
178 assert(s);
179
180 #ifdef PUGIXML_WCHAR_MODE
181 return wcslen(s);
182 #else
183 return strlen(s);
184 #endif
185 }
186
187 // Compare two strings
188 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
189 {
190 assert(src && dst);
191
192 #ifdef PUGIXML_WCHAR_MODE
193 return wcscmp(src, dst) == 0;
194 #else
195 return strcmp(src, dst) == 0;
196 #endif
197 }
198
199 // Compare lhs with [rhs_begin, rhs_end)
200 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
201 {
202 for (size_t i = 0; i < count; ++i)
203 if (lhs[i] != rhs[i])
204 return false;
205
206 return lhs[count] == 0;
207 }
208
209 // Get length of wide string, even if CRT lacks wide character support
210 PUGI__FN size_t strlength_wide(const wchar_t* s)
211 {
212 assert(s);
213
214 #ifdef PUGIXML_WCHAR_MODE
215 return wcslen(s);
216 #else
217 const wchar_t* end = s;
218 while (*end) end++;
219 return static_cast<size_t>(end - s);
220 #endif
221 }
222
223#ifdef PUGIXML_WCHAR_MODE
224 // Convert string to wide string, assuming all symbols are ASCII
225 PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
226 {
227 for (const char* i = source; *i; ++i) *dest++ = *i;
228 *dest = 0;
229 }
230#endif
232
233#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
234// auto_ptr-like buffer holder for exception recovery
237 {
238 void* data;
239 void (*deleter)(void*);
240
241 buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
242 {
243 }
244
246 {
247 if (data) deleter(data);
248 }
249
250 void* release()
251 {
252 void* result = data;
253 data = 0;
254 return result;
255 }
256 };
258#endif
259
261 static const size_t xml_memory_page_size =
262 #ifdef PUGIXML_MEMORY_PAGE_SIZE
263 PUGIXML_MEMORY_PAGE_SIZE
264 #else
265 32768
266 #endif
267 ;
268
274
275 struct xml_allocator;
276
278 {
280 {
281 if (!memory) return 0; //$ redundant, left for performance
282
283 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
284
285 result->allocator = 0;
286 result->memory = 0;
287 result->prev = 0;
288 result->next = 0;
289 result->busy_size = 0;
290 result->freed_size = 0;
291
292 return result;
293 }
294
296
297 void* memory;
298
301
302 size_t busy_size;
304
305 char data[1];
306 };
307
309 {
310 uint16_t page_offset; // offset from page->data
311 uint16_t full_size; // 0 if string occupies whole page
312 };
313
315 {
316 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
317 {
318 }
319
321 {
322 size_t size = offsetof(xml_memory_page, data) + data_size;
323
324 // allocate block with some alignment, leaving memory for worst-case padding
325 void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
326 if (!memory) return 0;
327
328 // align upwards to page boundary
329 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
330
331 // prepare page structure
332 xml_memory_page* page = xml_memory_page::construct(page_memory);
333 assert(page);
334
335 page->memory = memory;
336 page->allocator = _root->allocator;
337
338 return page;
339 }
340
342 {
344 }
345
346 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
347
348 void* allocate_memory(size_t size, xml_memory_page*& out_page)
349 {
350 if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
351
352 void* buf = _root->data + _busy_size;
353
354 _busy_size += size;
355
356 out_page = _root;
357
358 return buf;
359 }
360
361 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
362 {
363 if (page == _root) page->busy_size = _busy_size;
364
365 assert(ptr >= page->data && ptr < page->data + page->busy_size);
366 (void)!ptr;
367
368 page->freed_size += size;
369 assert(page->freed_size <= page->busy_size);
370
371 if (page->freed_size == page->busy_size)
372 {
373 if (page->next == 0)
374 {
375 assert(_root == page);
376
377 // top page freed, just reset sizes
378 page->busy_size = page->freed_size = 0;
379 _busy_size = 0;
380 }
381 else
382 {
383 assert(_root != page);
384 assert(page->prev);
385
386 // remove from the list
387 page->prev->next = page->next;
388 page->next->prev = page->prev;
389
390 // deallocate
391 deallocate_page(page);
392 }
393 }
394 }
395
396 char_t* allocate_string(size_t length)
397 {
398 // allocate memory for string and header block
399 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
400
401 // round size up to pointer alignment boundary
402 size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
403
404 xml_memory_page* page;
405 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
406
407 if (!header) return 0;
408
409 // setup header
410 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
411
412 assert(page_offset >= 0 && page_offset < (1 << 16));
413 header->page_offset = static_cast<uint16_t>(page_offset);
414
415 // full_size == 0 for large strings that occupy the whole page
416 assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
417 header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
418
419 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
420 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
421 return static_cast<char_t*>(static_cast<void*>(header + 1));
422 }
423
424 void deallocate_string(char_t* string)
425 {
426 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
427 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
428
429 // get header
430 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
431
432 // deallocate
433 size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
434 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
435
436 // if full_size == 0 then this string occupies the whole page
437 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
438
439 deallocate_memory(header, full_size, page);
440 }
441
444 };
445
447 {
448 const size_t large_allocation_threshold = xml_memory_page_size / 4;
449
450 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
451 out_page = page;
452
453 if (!page) return 0;
454
455 if (size <= large_allocation_threshold)
456 {
458
459 // insert page at the end of linked list
460 page->prev = _root;
461 _root->next = page;
462 _root = page;
463
464 _busy_size = size;
465 }
466 else
467 {
468 // insert page before the end of linked list, so that it is deleted as soon as possible
469 // the last page is not deleted even if it's empty (see deallocate_memory)
470 assert(_root->prev);
471
472 page->prev = _root->prev;
473 page->next = _root;
474
475 _root->prev->next = page;
476 _root->prev = page;
477 }
478
479 // allocate inside page
480 page->busy_size = size;
481
482 return page->data;
483 }
485
486namespace pugi
487{
490 {
492 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
493 {
494 }
495
497
498 char_t* name;
499 char_t* value;
500
503 };
504
507 {
510 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
511 {
512 }
513
515
517
518 char_t* name;
519 char_t* value;
520
522
525
527 };
528}
529
532 {
533 char_t* buffer;
535 };
536
537 struct xml_document_struct: public xml_node_struct, public xml_allocator
538 {
539 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
540 {
541 }
542
543 const char_t* buffer;
544
546 };
547
548 inline xml_allocator& get_allocator(const xml_node_struct* node)
549 {
550 assert(node);
551
552 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
553 }
555
556// Low-level DOM operations
558 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
559 {
560 xml_memory_page* page;
561 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
562
563 return new (memory) xml_attribute_struct(page);
564 }
565
566 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
567 {
568 xml_memory_page* page;
569 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
570
571 return new (memory) xml_node_struct(page, type);
572 }
573
574 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
575 {
576 uintptr_t header = a->header;
577
578 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
579 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
580
581 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
582 }
583
584 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
585 {
586 uintptr_t header = n->header;
587
588 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
589 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
590
591 for (xml_attribute_struct* attr = n->first_attribute; attr; )
592 {
593 xml_attribute_struct* next = attr->next_attribute;
594
595 destroy_attribute(attr, alloc);
596
597 attr = next;
598 }
599
600 for (xml_node_struct* child = n->first_child; child; )
601 {
602 xml_node_struct* next = child->next_sibling;
603
604 destroy_node(child, alloc);
605
606 child = next;
607 }
608
609 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
610 }
611
612 inline void append_node(xml_node_struct* child, xml_node_struct* node)
613 {
614 child->parent = node;
615
616 xml_node_struct* head = node->first_child;
617
618 if (head)
619 {
620 xml_node_struct* tail = head->prev_sibling_c;
621
622 tail->next_sibling = child;
623 child->prev_sibling_c = tail;
624 head->prev_sibling_c = child;
625 }
626 else
627 {
628 node->first_child = child;
629 child->prev_sibling_c = child;
630 }
631
632 child->next_sibling = 0;
633 }
634
635 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
636 {
637 child->parent = node;
638
639 xml_node_struct* head = node->first_child;
640
641 if (head)
642 {
643 child->prev_sibling_c = head->prev_sibling_c;
644 head->prev_sibling_c = child;
645 }
646 else
647 child->prev_sibling_c = child;
648
649 child->next_sibling = head;
650 node->first_child = child;
651 }
652
653 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
654 {
655 xml_node_struct* parent = node->parent;
656
657 child->parent = parent;
658
659 if (node->next_sibling)
660 node->next_sibling->prev_sibling_c = child;
661 else
662 parent->first_child->prev_sibling_c = child;
663
664 child->next_sibling = node->next_sibling;
665 child->prev_sibling_c = node;
666
667 node->next_sibling = child;
668 }
669
670 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
671 {
672 xml_node_struct* parent = node->parent;
673
674 child->parent = parent;
675
676 if (node->prev_sibling_c->next_sibling)
677 node->prev_sibling_c->next_sibling = child;
678 else
679 parent->first_child = child;
680
681 child->prev_sibling_c = node->prev_sibling_c;
682 child->next_sibling = node;
683
684 node->prev_sibling_c = child;
685 }
686
687 inline void remove_node(xml_node_struct* node)
688 {
689 xml_node_struct* parent = node->parent;
690
691 if (node->next_sibling)
692 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
693 else if (parent->first_child)
694 parent->first_child->prev_sibling_c = node->prev_sibling_c;
695
696 if (node->prev_sibling_c->next_sibling)
697 node->prev_sibling_c->next_sibling = node->next_sibling;
698 else
699 parent->first_child = node->next_sibling;
700 }
701
702 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
703 {
704 xml_node_struct* child = allocate_node(alloc, type);
705 if (!child) return 0;
706
707 append_node(child, node);
708
709 return child;
710 }
711
712 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
713 {
714 xml_attribute_struct* a = allocate_attribute(alloc);
715 if (!a) return 0;
716
717 xml_attribute_struct* first_attribute = node->first_attribute;
718
719 if (first_attribute)
720 {
721 xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
722
723 last_attribute->next_attribute = a;
724 a->prev_attribute_c = last_attribute;
725 first_attribute->prev_attribute_c = a;
726 }
727 else
728 {
729 node->first_attribute = a;
730 a->prev_attribute_c = a;
731 }
732
733 return a;
734 }
736
737// Helper classes for code generation
740 {
741 enum { value = 0 };
742 };
743
744 struct opt_true
745 {
746 enum { value = 1 };
747 };
749
750// Unicode utilities
753 {
754 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
755 }
756
758 {
759 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
760 }
761
763 {
764 typedef size_t value_type;
765
767 {
768 // U+0000..U+007F
769 if (ch < 0x80) return result + 1;
770 // U+0080..U+07FF
771 else if (ch < 0x800) return result + 2;
772 // U+0800..U+FFFF
773 else return result + 3;
774 }
775
777 {
778 // U+10000..U+10FFFF
779 return result + 4;
780 }
781 };
782
784 {
786
788 {
789 // U+0000..U+007F
790 if (ch < 0x80)
791 {
792 *result = static_cast<uint8_t>(ch);
793 return result + 1;
794 }
795 // U+0080..U+07FF
796 else if (ch < 0x800)
797 {
798 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
799 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
800 return result + 2;
801 }
802 // U+0800..U+FFFF
803 else
804 {
805 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
806 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
807 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
808 return result + 3;
809 }
810 }
811
813 {
814 // U+10000..U+10FFFF
815 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
816 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
817 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
818 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
819 return result + 4;
820 }
821
823 {
824 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
825 }
826 };
827
829 {
830 typedef size_t value_type;
831
833 {
834 return result + 1;
835 }
836
838 {
839 return result + 2;
840 }
841 };
842
844 {
846
848 {
849 *result = static_cast<uint16_t>(ch);
850
851 return result + 1;
852 }
853
855 {
856 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
857 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
858
859 result[0] = static_cast<uint16_t>(0xD800 + msh);
860 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
861
862 return result + 2;
863 }
864
866 {
867 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
868 }
869 };
870
872 {
873 typedef size_t value_type;
874
876 {
877 return result + 1;
878 }
879
881 {
882 return result + 1;
883 }
884 };
885
887 {
889
891 {
892 *result = ch;
893
894 return result + 1;
895 }
896
898 {
899 *result = ch;
900
901 return result + 1;
902 }
903
905 {
906 *result = ch;
907
908 return result + 1;
909 }
910 };
911
913 {
915
917 {
918 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
919
920 return result + 1;
921 }
922
924 {
925 (void)ch;
926
927 *result = '?';
928
929 return result + 1;
930 }
931 };
932
933 template <size_t size> struct wchar_selector;
934
935 template <> struct wchar_selector<2>
936 {
937 typedef uint16_t type;
940 };
941
942 template <> struct wchar_selector<4>
943 {
944 typedef uint32_t type;
947 };
948
949 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
950 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
951
952 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
953 {
954 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
955 {
956 const uint8_t utf8_byte_mask = 0x3f;
957
958 while (size)
959 {
960 uint8_t lead = *data;
961
962 // 0xxxxxxx -> U+0000..U+007F
963 if (lead < 0x80)
964 {
965 result = Traits::low(result, lead);
966 data += 1;
967 size -= 1;
968
969 // process aligned single-byte (ascii) blocks
970 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
971 {
972 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
973 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
974 {
975 result = Traits::low(result, data[0]);
976 result = Traits::low(result, data[1]);
977 result = Traits::low(result, data[2]);
978 result = Traits::low(result, data[3]);
979 data += 4;
980 size -= 4;
981 }
982 }
983 }
984 // 110xxxxx -> U+0080..U+07FF
985 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
986 {
987 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
988 data += 2;
989 size -= 2;
990 }
991 // 1110xxxx -> U+0800-U+FFFF
992 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
993 {
994 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
995 data += 3;
996 size -= 3;
997 }
998 // 11110xxx -> U+10000..U+10FFFF
999 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1000 {
1001 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1002 data += 4;
1003 size -= 4;
1004 }
1005 // 10xxxxxx or 11111xxx -> invalid
1006 else
1007 {
1008 data += 1;
1009 size -= 1;
1010 }
1011 }
1012
1013 return result;
1014 }
1015
1016 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
1017 {
1018 const uint16_t* end = data + size;
1019
1020 while (data < end)
1021 {
1022 unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
1023
1024 // U+0000..U+D7FF
1025 if (lead < 0xD800)
1026 {
1027 result = Traits::low(result, lead);
1028 data += 1;
1029 }
1030 // U+E000..U+FFFF
1031 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1032 {
1033 result = Traits::low(result, lead);
1034 data += 1;
1035 }
1036 // surrogate pair lead
1037 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
1038 {
1039 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1040
1041 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1042 {
1043 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1044 data += 2;
1045 }
1046 else
1047 {
1048 data += 1;
1049 }
1050 }
1051 else
1052 {
1053 data += 1;
1054 }
1055 }
1056
1057 return result;
1058 }
1059
1060 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
1061 {
1062 const uint32_t* end = data + size;
1063
1064 while (data < end)
1065 {
1066 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1067
1068 // U+0000..U+FFFF
1069 if (lead < 0x10000)
1070 {
1071 result = Traits::low(result, lead);
1072 data += 1;
1073 }
1074 // U+10000..U+10FFFF
1075 else
1076 {
1077 result = Traits::high(result, lead);
1078 data += 1;
1079 }
1080 }
1081
1082 return result;
1083 }
1084
1085 static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
1086 {
1087 for (size_t i = 0; i < size; ++i)
1088 {
1089 result = Traits::low(result, data[i]);
1090 }
1091
1092 return result;
1093 }
1094
1095 static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
1096 {
1097 return decode_utf16_block(data, size, result);
1098 }
1099
1100 static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
1101 {
1102 return decode_utf32_block(data, size, result);
1103 }
1104
1105 static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
1106 {
1107 return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
1108 }
1109 };
1110
1111 template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
1112 {
1113 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
1114 }
1115
1116#ifdef PUGIXML_WCHAR_MODE
1117 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1118 {
1119 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1120 }
1121#endif
1123
1126 {
1127 ct_parse_pcdata = 1, // \0, &, \r, <
1128 ct_parse_attr = 2, // \0, &, \r, ', "
1129 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1130 ct_space = 8, // \r, \n, space, tab
1131 ct_parse_cdata = 16, // \0, ], >, \r
1132 ct_parse_comment = 32, // \0, -, >, \r
1133 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1134 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1136
1137 static const unsigned char chartype_table[256] =
1138 {
1139 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1140 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1141 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1142 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1143 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1144 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1145 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1146 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1147
1148 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1149 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1150 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1151 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1152 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1153 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1154 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1155 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1156 };
1157
1159 {
1160 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1161 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1162 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1163 ctx_digit = 8, // 0-9
1164 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1166
1167 static const unsigned char chartypex_table[256] =
1168 {
1169 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1171 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1172 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1173
1174 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1175 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1176 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1177 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1178
1179 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1180 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1181 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1182 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1183 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1184 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1185 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1186 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1187 };
1188
1189#ifdef PUGIXML_WCHAR_MODE
1190 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1191#else
1192 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1193#endif
1194
1195 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1196 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1197
1199 {
1200 unsigned int ui = 1;
1201
1202 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1203 }
1204
1206 {
1207 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1208
1209 if (sizeof(wchar_t) == 2)
1210 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1211 else
1212 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1213 }
1214
1216 {
1217 // look for BOM in first few bytes
1218 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1219 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1220 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1221 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1222 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1223
1224 // look for <, <? or <?xm in various encodings
1225 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1226 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1227 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1228 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1229 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
1230
1231 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1232 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1233 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1234
1235 // no known BOM detected, assume utf8
1236 return encoding_utf8;
1237 }
1238
1239 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1240 {
1241 // replace wchar encoding with utf implementation
1242 if (encoding == encoding_wchar) return get_wchar_encoding();
1243
1244 // replace utf16 encoding with utf16 with specific endianness
1245 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1246
1247 // replace utf32 encoding with utf32 with specific endianness
1248 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1249
1250 // only do autodetection if no explicit encoding is requested
1251 if (encoding != encoding_auto) return encoding;
1252
1253 // skip encoding autodetection if input buffer is too small
1254 if (size < 4) return encoding_utf8;
1255
1256 // try to guess encoding (based on XML specification, Appendix F.1)
1257 const uint8_t* data = static_cast<const uint8_t*>(contents);
1258
1259 PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1260
1261 return guess_buffer_encoding(d0, d1, d2, d3);
1262 }
1263
1264 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1265 {
1266 size_t length = size / sizeof(char_t);
1267
1268 if (is_mutable)
1269 {
1270 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1271 out_length = length;
1272 }
1273 else
1274 {
1275 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1276 if (!buffer) return false;
1277
1278 memcpy(buffer, contents, length * sizeof(char_t));
1279 buffer[length] = 0;
1280
1281 out_buffer = buffer;
1282 out_length = length + 1;
1283 }
1284
1285 return true;
1286 }
1287
1288#ifdef PUGIXML_WCHAR_MODE
1289 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
1290 {
1291 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
1292 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
1293 }
1294
1295 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1296 {
1297 const char_t* data = static_cast<const char_t*>(contents);
1298 size_t length = size / sizeof(char_t);
1299
1300 if (is_mutable)
1301 {
1302 char_t* buffer = const_cast<char_t*>(data);
1303
1304 convert_wchar_endian_swap(buffer, data, length);
1305
1306 out_buffer = buffer;
1307 out_length = length;
1308 }
1309 else
1310 {
1311 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1312 if (!buffer) return false;
1313
1314 convert_wchar_endian_swap(buffer, data, length);
1315 buffer[length] = 0;
1316
1317 out_buffer = buffer;
1318 out_length = length + 1;
1319 }
1320
1321 return true;
1322 }
1323
1324 PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1325 {
1326 const uint8_t* data = static_cast<const uint8_t*>(contents);
1327 size_t data_length = size;
1328
1329 // first pass: get length in wchar_t units
1330 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);
1331
1332 // allocate buffer of suitable length
1333 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1334 if (!buffer) return false;
1335
1336 // second pass: convert utf8 input to wchar_t
1337 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
1338 wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);
1339
1340 assert(oend == obegin + length);
1341 *oend = 0;
1342
1343 out_buffer = buffer;
1344 out_length = length + 1;
1345
1346 return true;
1347 }
1348
1349 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1350 {
1351 const uint16_t* data = static_cast<const uint16_t*>(contents);
1352 size_t data_length = size / sizeof(uint16_t);
1353
1354 // first pass: get length in wchar_t units
1355 size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
1356
1357 // allocate buffer of suitable length
1358 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1359 if (!buffer) return false;
1360
1361 // second pass: convert utf16 input to wchar_t
1362 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
1363 wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
1364
1365 assert(oend == obegin + length);
1366 *oend = 0;
1367
1368 out_buffer = buffer;
1369 out_length = length + 1;
1370
1371 return true;
1372 }
1373
1374 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1375 {
1376 const uint32_t* data = static_cast<const uint32_t*>(contents);
1377 size_t data_length = size / sizeof(uint32_t);
1378
1379 // first pass: get length in wchar_t units
1380 size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
1381
1382 // allocate buffer of suitable length
1383 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1384 if (!buffer) return false;
1385
1386 // second pass: convert utf32 input to wchar_t
1387 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
1388 wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
1389
1390 assert(oend == obegin + length);
1391 *oend = 0;
1392
1393 out_buffer = buffer;
1394 out_length = length + 1;
1395
1396 return true;
1397 }
1398
1399 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1400 {
1401 const uint8_t* data = static_cast<const uint8_t*>(contents);
1402 size_t data_length = size;
1403
1404 // get length in wchar_t units
1405 size_t length = data_length;
1406
1407 // allocate buffer of suitable length
1408 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1409 if (!buffer) return false;
1410
1411 // convert latin1 input to wchar_t
1412 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
1413 wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);
1414
1415 assert(oend == obegin + length);
1416 *oend = 0;
1417
1418 out_buffer = buffer;
1419 out_length = length + 1;
1420
1421 return true;
1422 }
1423
1424 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1425 {
1426 // get native encoding
1427 xml_encoding wchar_encoding = get_wchar_encoding();
1428
1429 // fast path: no conversion required
1430 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1431
1432 // only endian-swapping is required
1433 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
1434
1435 // source encoding is utf8
1436 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
1437
1438 // source encoding is utf16
1439 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1440 {
1441 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1442
1443 return (native_encoding == encoding) ?
1444 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1445 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1446 }
1447
1448 // source encoding is utf32
1449 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1450 {
1451 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1452
1453 return (native_encoding == encoding) ?
1454 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1455 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1456 }
1457
1458 // source encoding is latin1
1459 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
1460
1461 assert(!"Invalid encoding");
1462 return false;
1463 }
1464#else
1465 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1466 {
1467 const uint16_t* data = static_cast<const uint16_t*>(contents);
1468 size_t data_length = size / sizeof(uint16_t);
1469
1470 // first pass: get length in utf8 units
1471 size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
1472
1473 // allocate buffer of suitable length
1474 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1475 if (!buffer) return false;
1476
1477 // second pass: convert utf16 input to utf8
1478 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
1479 uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
1480
1481 assert(oend == obegin + length);
1482 *oend = 0;
1483
1484 out_buffer = buffer;
1485 out_length = length + 1;
1486
1487 return true;
1488 }
1489
1490 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1491 {
1492 const uint32_t* data = static_cast<const uint32_t*>(contents);
1493 size_t data_length = size / sizeof(uint32_t);
1494
1495 // first pass: get length in utf8 units
1496 size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
1497
1498 // allocate buffer of suitable length
1499 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1500 if (!buffer) return false;
1501
1502 // second pass: convert utf32 input to utf8
1503 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
1504 uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
1505
1506 assert(oend == obegin + length);
1507 *oend = 0;
1508
1509 out_buffer = buffer;
1510 out_length = length + 1;
1511
1512 return true;
1513 }
1514
1515 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
1516 {
1517 for (size_t i = 0; i < size; ++i)
1518 if (data[i] > 127)
1519 return i;
1520
1521 return size;
1522 }
1523
1524 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1525 {
1526 const uint8_t* data = static_cast<const uint8_t*>(contents);
1527 size_t data_length = size;
1528
1529 // get size of prefix that does not need utf8 conversion
1530 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
1531 assert(prefix_length <= data_length);
1532
1533 const uint8_t* postfix = data + prefix_length;
1534 size_t postfix_length = data_length - prefix_length;
1535
1536 // if no conversion is needed, just return the original buffer
1537 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1538
1539 // first pass: get length in utf8 units
1540 size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
1541
1542 // allocate buffer of suitable length
1543 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1544 if (!buffer) return false;
1545
1546 // second pass: convert latin1 input to utf8
1547 memcpy(buffer, data, prefix_length);
1548
1549 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
1550 uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
1551
1552 assert(oend == obegin + length);
1553 *oend = 0;
1554
1555 out_buffer = buffer;
1556 out_length = length + 1;
1557
1558 return true;
1559 }
1560
1561 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1562 {
1563 // fast path: no conversion required
1564 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1565
1566 // source encoding is utf16
1567 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1568 {
1569 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1570
1571 return (native_encoding == encoding) ?
1572 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1573 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1574 }
1575
1576 // source encoding is utf32
1577 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1578 {
1579 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1580
1581 return (native_encoding == encoding) ?
1582 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1583 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1584 }
1585
1586 // source encoding is latin1
1587 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
1588
1589 assert(!"Invalid encoding");
1590 return false;
1591 }
1592#endif
1593
1594 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
1595 {
1596 // get length in utf8 characters
1597 return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
1598 }
1599
1600 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1601 {
1602 // convert to utf8
1603 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1604 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
1605
1606 assert(begin + size == end);
1607 (void)!end;
1608
1609 // zero-terminate
1610 buffer[size] = 0;
1611 }
1612
1613#ifndef PUGIXML_NO_STL
1614 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
1615 {
1616 // first pass: get length in utf8 characters
1617 size_t size = as_utf8_begin(str, length);
1618
1619 // allocate resulting string
1620 std::string result;
1621 result.resize(size);
1622
1623 // second pass: convert to utf8
1624 if (size > 0) as_utf8_end(&result[0], size, str, length);
1625
1626 return result;
1627 }
1628
1629 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
1630 {
1631 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1632
1633 // first pass: get length in wchar_t units
1634 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1635
1636 // allocate resulting string
1637 std::basic_string<wchar_t> result;
1638 result.resize(length);
1639
1640 // second pass: convert to wchar_t
1641 if (length > 0)
1642 {
1643 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
1644 wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
1645
1646 assert(begin + length == end);
1647 (void)!end;
1648 }
1649
1650 return result;
1651 }
1652#endif
1653
1654 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
1655 {
1656 assert(target);
1657 size_t target_length = strlength(target);
1658
1659 // always reuse document buffer memory if possible
1660 if (!allocated) return target_length >= length;
1661
1662 // reuse heap memory if waste is not too great
1663 const size_t reuse_threshold = 32;
1664
1665 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
1666 }
1667
1668 PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
1669 {
1670 assert(header);
1671
1672 size_t source_length = strlength(source);
1673
1674 if (source_length == 0)
1675 {
1676 // empty string and null pointer are equivalent, so just deallocate old memory
1677 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1678
1679 if (header & header_mask) alloc->deallocate_string(dest);
1680
1681 // mark the string as not allocated
1682 dest = 0;
1683 header &= ~header_mask;
1684
1685 return true;
1686 }
1687 else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1688 {
1689 // we can reuse old buffer, so just copy the new data (including zero terminator)
1690 memcpy(dest, source, (source_length + 1) * sizeof(char_t));
1691
1692 return true;
1693 }
1694 else
1695 {
1696 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1697
1698 // allocate new buffer
1699 char_t* buf = alloc->allocate_string(source_length + 1);
1700 if (!buf) return false;
1701
1702 // copy the string (including zero terminator)
1703 memcpy(buf, source, (source_length + 1) * sizeof(char_t));
1704
1705 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1706 if (header & header_mask) alloc->deallocate_string(dest);
1707
1708 // the string is now allocated, so set the flag
1709 dest = buf;
1710 header |= header_mask;
1711
1712 return true;
1713 }
1714 }
1715
1716 struct gap
1717 {
1718 char_t* end;
1719 size_t size;
1720
1721 gap(): end(0), size(0)
1722 {
1723 }
1724
1725 // Push new gap, move s count bytes further (skipping the gap).
1726 // Collapse previous gap.
1727 void push(char_t*& s, size_t count)
1728 {
1729 if (end) // there was a gap already; collapse it
1730 {
1731 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1732 assert(s >= end);
1733 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1734 }
1735
1736 s += count; // end of current gap
1737
1738 // "merge" two gaps
1739 end = s;
1740 size += count;
1741 }
1742
1743 // Collapse all gaps, return past-the-end pointer
1744 char_t* flush(char_t* s)
1745 {
1746 if (end)
1747 {
1748 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1749 assert(s >= end);
1750 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1751
1752 return s - size;
1753 }
1754 else return s;
1755 }
1756 };
1757
1758 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
1759 {
1760 char_t* stre = s + 1;
1761
1762 switch (*stre)
1763 {
1764 case '#': // &#...
1765 {
1766 unsigned int ucsc = 0;
1767
1768 if (stre[1] == 'x') // &#x... (hex code)
1769 {
1770 stre += 2;
1771
1772 char_t ch = *stre;
1773
1774 if (ch == ';') return stre;
1775
1776 for (;;)
1777 {
1778 if (static_cast<unsigned int>(ch - '0') <= 9)
1779 ucsc = 16 * ucsc + (ch - '0');
1780 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1781 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1782 else if (ch == ';')
1783 break;
1784 else // cancel
1785 return stre;
1786
1787 ch = *++stre;
1788 }
1789
1790 ++stre;
1791 }
1792 else // &#... (dec code)
1793 {
1794 char_t ch = *++stre;
1795
1796 if (ch == ';') return stre;
1797
1798 for (;;)
1799 {
1800 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
1801 ucsc = 10 * ucsc + (ch - '0');
1802 else if (ch == ';')
1803 break;
1804 else // cancel
1805 return stre;
1806
1807 ch = *++stre;
1808 }
1809
1810 ++stre;
1811 }
1812
1813 #ifdef PUGIXML_WCHAR_MODE
1814 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
1815 #else
1816 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
1817 #endif
1818
1819 g.push(s, stre - s);
1820 return stre;
1821 }
1822
1823 case 'a': // &a
1824 {
1825 ++stre;
1826
1827 if (*stre == 'm') // &am
1828 {
1829 if (*++stre == 'p' && *++stre == ';') // &amp;
1830 {
1831 *s++ = '&';
1832 ++stre;
1833
1834 g.push(s, stre - s);
1835 return stre;
1836 }
1837 }
1838 else if (*stre == 'p') // &ap
1839 {
1840 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1841 {
1842 *s++ = '\'';
1843 ++stre;
1844
1845 g.push(s, stre - s);
1846 return stre;
1847 }
1848 }
1849 break;
1850 }
1851
1852 case 'g': // &g
1853 {
1854 if (*++stre == 't' && *++stre == ';') // &gt;
1855 {
1856 *s++ = '>';
1857 ++stre;
1858
1859 g.push(s, stre - s);
1860 return stre;
1861 }
1862 break;
1863 }
1864
1865 case 'l': // &l
1866 {
1867 if (*++stre == 't' && *++stre == ';') // &lt;
1868 {
1869 *s++ = '<';
1870 ++stre;
1871
1872 g.push(s, stre - s);
1873 return stre;
1874 }
1875 break;
1876 }
1877
1878 case 'q': // &q
1879 {
1880 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1881 {
1882 *s++ = '"';
1883 ++stre;
1884
1885 g.push(s, stre - s);
1886 return stre;
1887 }
1888 break;
1889 }
1890
1891 default:
1892 break;
1893 }
1894
1895 return stre;
1896 }
1897
1898 // Parser utilities
1899 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1900 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
1901 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
1902 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
1903 #define PUGI__POPNODE() { cursor = cursor->parent; }
1904 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
1905 #define PUGI__SCANWHILE(X) { while (X) ++s; }
1906 #define PUGI__SCANWHILE_UNROLL(X) { while (X) { ++s; if (PUGI__UNLIKELY(!(X))) break; ++s; if (PUGI__UNLIKELY(!(X))) break; ++s; if (PUGI__UNLIKELY(!(X))) break; ++s; } }
1907 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
1908 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
1909 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
1910
1911 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
1912 {
1913 gap g;
1914
1915 while (true)
1916 {
1918
1919 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1920 {
1921 *s++ = '\n'; // replace first one with 0x0a
1922
1923 if (*s == '\n') g.push(s, 1);
1924 }
1925 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
1926 {
1927 *g.flush(s) = 0;
1928
1929 return s + (s[2] == '>' ? 3 : 2);
1930 }
1931 else if (*s == 0)
1932 {
1933 return 0;
1934 }
1935 else ++s;
1936 }
1937 }
1938
1939 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
1940 {
1941 gap g;
1942
1943 while (true)
1944 {
1946
1947 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1948 {
1949 *s++ = '\n'; // replace first one with 0x0a
1950
1951 if (*s == '\n') g.push(s, 1);
1952 }
1953 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
1954 {
1955 *g.flush(s) = 0;
1956
1957 return s + 1;
1958 }
1959 else if (*s == 0)
1960 {
1961 return 0;
1962 }
1963 else ++s;
1964 }
1965 }
1966
1967 typedef char_t* (*strconv_pcdata_t)(char_t*);
1968
1969 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1970 {
1971 static char_t* parse(char_t* s)
1972 {
1973 gap g;
1974
1975 char_t* begin = s;
1976
1977 while (true)
1978 {
1980
1981 if (*s == '<') // PCDATA ends here
1982 {
1983 char_t* end = g.flush(s);
1984
1985 if (opt_trim::value)
1986 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
1987 --end;
1988
1989 *end = 0;
1990
1991 return s + 1;
1992 }
1993 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1994 {
1995 *s++ = '\n'; // replace first one with 0x0a
1996
1997 if (*s == '\n') g.push(s, 1);
1998 }
1999 else if (opt_escape::value && *s == '&')
2000 {
2001 s = strconv_escape(s, g);
2002 }
2003 else if (*s == 0)
2004 {
2005 char_t* end = g.flush(s);
2006
2007 if (opt_trim::value)
2008 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2009 --end;
2010
2011 *end = 0;
2012
2013 return s;
2014 }
2015 else ++s;
2016 }
2017 }
2018 };
2019
2021 {
2022 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2023
2024 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2025 {
2034 default: assert(false); return 0; // should not get here
2035 }
2036 }
2037
2038 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2039
2040 template <typename opt_escape> struct strconv_attribute_impl
2041 {
2042 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2043 {
2044 gap g;
2045
2046 // trim leading whitespaces
2047 if (PUGI__IS_CHARTYPE(*s, ct_space))
2048 {
2049 char_t* str = s;
2050
2051 do ++str;
2052 while (PUGI__IS_CHARTYPE(*str, ct_space));
2053
2054 g.push(s, str - s);
2055 }
2056
2057 while (true)
2058 {
2060
2061 if (*s == end_quote)
2062 {
2063 char_t* str = g.flush(s);
2064
2065 do *str-- = 0;
2066 while (PUGI__IS_CHARTYPE(*str, ct_space));
2067
2068 return s + 1;
2069 }
2070 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2071 {
2072 *s++ = ' ';
2073
2074 if (PUGI__IS_CHARTYPE(*s, ct_space))
2075 {
2076 char_t* str = s + 1;
2077 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2078
2079 g.push(s, str - s);
2080 }
2081 }
2082 else if (opt_escape::value && *s == '&')
2083 {
2084 s = strconv_escape(s, g);
2085 }
2086 else if (!*s)
2087 {
2088 return 0;
2089 }
2090 else ++s;
2091 }
2092 }
2093
2094 static char_t* parse_wconv(char_t* s, char_t end_quote)
2095 {
2096 gap g;
2097
2098 while (true)
2099 {
2101
2102 if (*s == end_quote)
2103 {
2104 *g.flush(s) = 0;
2105
2106 return s + 1;
2107 }
2108 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2109 {
2110 if (*s == '\r')
2111 {
2112 *s++ = ' ';
2113
2114 if (*s == '\n') g.push(s, 1);
2115 }
2116 else *s++ = ' ';
2117 }
2118 else if (opt_escape::value && *s == '&')
2119 {
2120 s = strconv_escape(s, g);
2121 }
2122 else if (!*s)
2123 {
2124 return 0;
2125 }
2126 else ++s;
2127 }
2128 }
2129
2130 static char_t* parse_eol(char_t* s, char_t end_quote)
2131 {
2132 gap g;
2133
2134 while (true)
2135 {
2137
2138 if (*s == end_quote)
2139 {
2140 *g.flush(s) = 0;
2141
2142 return s + 1;
2143 }
2144 else if (*s == '\r')
2145 {
2146 *s++ = '\n';
2147
2148 if (*s == '\n') g.push(s, 1);
2149 }
2150 else if (opt_escape::value && *s == '&')
2151 {
2152 s = strconv_escape(s, g);
2153 }
2154 else if (!*s)
2155 {
2156 return 0;
2157 }
2158 else ++s;
2159 }
2160 }
2161
2162 static char_t* parse_simple(char_t* s, char_t end_quote)
2163 {
2164 gap g;
2165
2166 while (true)
2167 {
2169
2170 if (*s == end_quote)
2171 {
2172 *g.flush(s) = 0;
2173
2174 return s + 1;
2175 }
2176 else if (opt_escape::value && *s == '&')
2177 {
2178 s = strconv_escape(s, g);
2179 }
2180 else if (!*s)
2181 {
2182 return 0;
2183 }
2184 else ++s;
2185 }
2186 }
2187 };
2188
2190 {
2191 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2192
2193 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2194 {
2211 default: assert(false); return 0; // should not get here
2212 }
2213 }
2214
2215 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2216 {
2217 xml_parse_result result;
2218 result.status = status;
2219 result.offset = offset;
2220
2221 return result;
2222 }
2223
2225 {
2228 xml_parse_status error_status;
2229
2230 xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2231 {
2232 }
2233
2234 // DOCTYPE consists of nested sections of the following possible types:
2235 // <!-- ... -->, <? ... ?>, "...", '...'
2236 // <![...]]>
2237 // <!...>
2238 // First group can not contain nested groups
2239 // Second group can contain nested groups of the same type
2240 // Third group can contain all other groups
2241 char_t* parse_doctype_primitive(char_t* s)
2242 {
2243 if (*s == '"' || *s == '\'')
2244 {
2245 // quoted string
2246 char_t ch = *s++;
2247 PUGI__SCANFOR(*s == ch);
2248 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2249
2250 s++;
2251 }
2252 else if (s[0] == '<' && s[1] == '?')
2253 {
2254 // <? ... ?>
2255 s += 2;
2256 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2257 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2258
2259 s += 2;
2260 }
2261 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2262 {
2263 s += 4;
2264 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2265 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2266
2267 s += 4;
2268 }
2269 else PUGI__THROW_ERROR(status_bad_doctype, s);
2270
2271 return s;
2272 }
2273
2274 char_t* parse_doctype_ignore(char_t* s)
2275 {
2276 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2277 s++;
2278
2279 while (*s)
2280 {
2281 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2282 {
2283 // nested ignore section
2284 s = parse_doctype_ignore(s);
2285 if (!s) return s;
2286 }
2287 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2288 {
2289 // ignore section end
2290 s += 3;
2291
2292 return s;
2293 }
2294 else s++;
2295 }
2296
2297 PUGI__THROW_ERROR(status_bad_doctype, s);
2298 }
2299
2300 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
2301 {
2302 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2303 s++;
2304
2305 while (*s)
2306 {
2307 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2308 {
2309 if (s[2] == '[')
2310 {
2311 // ignore
2312 s = parse_doctype_ignore(s);
2313 if (!s) return s;
2314 }
2315 else
2316 {
2317 // some control group
2318 s = parse_doctype_group(s, endch, false);
2319 if (!s) return s;
2320
2321 // skip >
2322 assert(*s == '>');
2323 s++;
2324 }
2325 }
2326 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2327 {
2328 // unknown tag (forbidden), or some primitive group
2330 if (!s) return s;
2331 }
2332 else if (*s == '>')
2333 {
2334 return s;
2335 }
2336 else s++;
2337 }
2338
2339 if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
2340
2341 return s;
2342 }
2343
2344 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
2345 {
2346 // parse node contents, starting with exclamation mark
2347 ++s;
2348
2349 if (*s == '-') // '<!-...'
2350 {
2351 ++s;
2352
2353 if (*s == '-') // '<!--...'
2354 {
2355 ++s;
2356
2357 if (PUGI__OPTSET(parse_comments))
2358 {
2359 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
2360 cursor->value = s; // Save the offset.
2361 }
2362
2363 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
2364 {
2365 s = strconv_comment(s, endch);
2366
2367 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
2368 }
2369 else
2370 {
2371 // Scan for terminating '-->'.
2372 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
2373 PUGI__CHECK_ERROR(status_bad_comment, s);
2374
2375 if (PUGI__OPTSET(parse_comments))
2376 *s = 0; // Zero-terminate this segment at the first terminating '-'.
2377
2378 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2379 }
2380 }
2381 else PUGI__THROW_ERROR(status_bad_comment, s);
2382 }
2383 else if (*s == '[')
2384 {
2385 // '<![CDATA[...'
2386 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2387 {
2388 ++s;
2389
2390 if (PUGI__OPTSET(parse_cdata))
2391 {
2392 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
2393 cursor->value = s; // Save the offset.
2394
2395 if (PUGI__OPTSET(parse_eol))
2396 {
2397 s = strconv_cdata(s, endch);
2398
2399 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
2400 }
2401 else
2402 {
2403 // Scan for terminating ']]>'.
2404 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
2405 PUGI__CHECK_ERROR(status_bad_cdata, s);
2406
2407 *s++ = 0; // Zero-terminate this segment.
2408 }
2409 }
2410 else // Flagged for discard, but we still have to scan for the terminator.
2411 {
2412 // Scan for terminating ']]>'.
2413 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
2414 PUGI__CHECK_ERROR(status_bad_cdata, s);
2415
2416 ++s;
2417 }
2418
2419 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2420 }
2421 else PUGI__THROW_ERROR(status_bad_cdata, s);
2422 }
2423 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
2424 {
2425 s -= 2;
2426
2427 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
2428
2429 char_t* mark = s + 9;
2430
2431 s = parse_doctype_group(s, endch, true);
2432 if (!s) return s;
2433
2434 assert((*s == 0 && endch == '>') || *s == '>');
2435 if (*s) *s++ = 0;
2436
2437 if (PUGI__OPTSET(parse_doctype))
2438 {
2439 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
2440
2441 PUGI__PUSHNODE(node_doctype);
2442
2443 cursor->value = mark;
2444
2445 PUGI__POPNODE();
2446 }
2447 }
2448 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
2449 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
2450 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2451
2452 return s;
2453 }
2454
2455 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
2456 {
2457 // load into registers
2458 xml_node_struct* cursor = ref_cursor;
2459 char_t ch = 0;
2460
2461 // parse node contents, starting with question mark
2462 ++s;
2463
2464 // read PI target
2465 char_t* target = s;
2466
2467 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
2468
2470 PUGI__CHECK_ERROR(status_bad_pi, s);
2471
2472 // determine node type; stricmp / strcasecmp is not portable
2473 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2474
2475 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
2476 {
2477 if (declaration)
2478 {
2479 // disallow non top-level declarations
2480 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
2481
2482 PUGI__PUSHNODE(node_declaration);
2483 }
2484 else
2485 {
2486 PUGI__PUSHNODE(node_pi);
2487 }
2488
2489 cursor->name = target;
2490
2491 PUGI__ENDSEG();
2492
2493 // parse value/attributes
2494 if (ch == '?')
2495 {
2496 // empty node
2497 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
2498 s += (*s == '>');
2499
2500 PUGI__POPNODE();
2501 }
2502 else if (PUGI__IS_CHARTYPE(ch, ct_space))
2503 {
2504 PUGI__SKIPWS();
2505
2506 // scan for tag end
2507 char_t* value = s;
2508
2509 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
2510 PUGI__CHECK_ERROR(status_bad_pi, s);
2511
2512 if (declaration)
2513 {
2514 // replace ending ? with / so that 'element' terminates properly
2515 *s = '/';
2516
2517 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2518 s = value;
2519 }
2520 else
2521 {
2522 // store value and step over >
2523 cursor->value = value;
2524 PUGI__POPNODE();
2525
2526 PUGI__ENDSEG();
2527
2528 s += (*s == '>');
2529 }
2530 }
2531 else PUGI__THROW_ERROR(status_bad_pi, s);
2532 }
2533 else
2534 {
2535 // scan for tag end
2536 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
2537 PUGI__CHECK_ERROR(status_bad_pi, s);
2538
2539 s += (s[1] == '>' ? 2 : 1);
2540 }
2541
2542 // store from registers
2543 ref_cursor = cursor;
2544
2545 return s;
2546 }
2547
2548 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
2549 {
2550 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
2551 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
2552
2553 char_t ch = 0;
2554 xml_node_struct* cursor = root;
2555 char_t* mark = s;
2556
2557 while (*s != 0)
2558 {
2559 if (*s == '<')
2560 {
2561 ++s;
2562
2563 LOC_TAG:
2564 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2565 {
2566 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
2567
2568 cursor->name = s;
2569
2570 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2571 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2572
2573 if (ch == '>')
2574 {
2575 // end of tag
2576 }
2577 else if (PUGI__IS_CHARTYPE(ch, ct_space))
2578 {
2579 LOC_ATTRIBUTES:
2580 while (true)
2581 {
2582 PUGI__SKIPWS(); // Eat any whitespace.
2583
2584 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2585 {
2586 xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
2587 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
2588
2589 a->name = s; // Save the offset.
2590
2591 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2592 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2593
2594 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2595 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2596
2597 if (PUGI__IS_CHARTYPE(ch, ct_space))
2598 {
2599 PUGI__SKIPWS(); // Eat any whitespace.
2600 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2601
2602 ch = *s;
2603 ++s;
2604 }
2605
2606 if (ch == '=') // '<... #=...'
2607 {
2608 PUGI__SKIPWS(); // Eat any whitespace.
2609
2610 if (*s == '"' || *s == '\'') // '<... #="...'
2611 {
2612 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2613 ++s; // Step over the quote.
2614 a->value = s; // Save the offset.
2615
2616 s = strconv_attribute(s, ch);
2617
2618 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
2619
2620 // After this line the loop continues from the start;
2621 // Whitespaces, / and > are ok, symbols and EOF are wrong,
2622 // everything else will be detected
2623 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
2624 }
2625 else PUGI__THROW_ERROR(status_bad_attribute, s);
2626 }
2627 else PUGI__THROW_ERROR(status_bad_attribute, s);
2628 }
2629 else if (*s == '/')
2630 {
2631 ++s;
2632
2633 if (*s == '>')
2634 {
2635 PUGI__POPNODE();
2636 s++;
2637 break;
2638 }
2639 else if (*s == 0 && endch == '>')
2640 {
2641 PUGI__POPNODE();
2642 break;
2643 }
2644 else PUGI__THROW_ERROR(status_bad_start_element, s);
2645 }
2646 else if (*s == '>')
2647 {
2648 ++s;
2649
2650 break;
2651 }
2652 else if (*s == 0 && endch == '>')
2653 {
2654 break;
2655 }
2656 else PUGI__THROW_ERROR(status_bad_start_element, s);
2657 }
2658
2659 // !!!
2660 }
2661 else if (ch == '/') // '<#.../'
2662 {
2663 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
2664
2665 PUGI__POPNODE(); // Pop.
2666
2667 s += (*s == '>');
2668 }
2669 else if (ch == 0)
2670 {
2671 // we stepped over null terminator, backtrack & handle closing tag
2672 --s;
2673
2674 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
2675 }
2676 else PUGI__THROW_ERROR(status_bad_start_element, s);
2677 }
2678 else if (*s == '/')
2679 {
2680 ++s;
2681
2682 char_t* name = cursor->name;
2683 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2684
2685 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
2686 {
2687 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2688 }
2689
2690 if (*name)
2691 {
2692 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
2693 else PUGI__THROW_ERROR(status_end_element_mismatch, s);
2694 }
2695
2696 PUGI__POPNODE(); // Pop.
2697
2698 PUGI__SKIPWS();
2699
2700 if (*s == 0)
2701 {
2702 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2703 }
2704 else
2705 {
2706 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2707 ++s;
2708 }
2709 }
2710 else if (*s == '?') // '<?...'
2711 {
2712 s = parse_question(s, cursor, optmsk, endch);
2713 if (!s) return s;
2714
2715 assert(cursor);
2716 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
2717 }
2718 else if (*s == '!') // '<!...'
2719 {
2720 s = parse_exclamation(s, cursor, optmsk, endch);
2721 if (!s) return s;
2722 }
2723 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
2724 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2725 }
2726 else
2727 {
2728 mark = s; // Save this offset while searching for a terminator.
2729
2730 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2731
2732 if (*s == '<' || !*s)
2733 {
2734 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
2735 assert(mark != s);
2736
2737 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
2738 {
2739 continue;
2740 }
2741 else if (PUGI__OPTSET(parse_ws_pcdata_single))
2742 {
2743 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
2744 }
2745 }
2746
2747 if (!PUGI__OPTSET(parse_trim_pcdata))
2748 s = mark;
2749
2750 if (cursor->parent || PUGI__OPTSET(parse_fragment))
2751 {
2752 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
2753 cursor->value = s; // Save the offset.
2754
2755 s = strconv_pcdata(s);
2756
2757 PUGI__POPNODE(); // Pop since this is a standalone.
2758
2759 if (!*s) break;
2760 }
2761 else
2762 {
2763 PUGI__SCANFOR(*s == '<'); // '...<'
2764 if (!*s) break;
2765
2766 ++s;
2767 }
2768
2769 // We're after '<'
2770 goto LOC_TAG;
2771 }
2772 }
2773
2774 // check that last tag is closed
2775 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2776
2777 return s;
2778 }
2779
2780 #ifdef PUGIXML_WCHAR_MODE
2781 static char_t* parse_skip_bom(char_t* s)
2782 {
2783 unsigned int bom = 0xfeff;
2784 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
2785 }
2786 #else
2787 static char_t* parse_skip_bom(char_t* s)
2788 {
2789 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
2790 }
2791 #endif
2792
2793 static bool has_element_node_siblings(xml_node_struct* node)
2794 {
2795 while (node)
2796 {
2797 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
2798 if (type == node_element) return true;
2799
2800 node = node->next_sibling;
2801 }
2802
2803 return false;
2804 }
2805
2806 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
2807 {
2808 // allocator object is a part of document object
2809 xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc);
2810
2811 // early-out for empty documents
2812 if (length == 0)
2813 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
2814
2815 // get last child of the root before parsing
2816 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
2817
2818 // create parser on stack
2819 xml_parser parser(alloc);
2820
2821 // save last character and make buffer zero-terminated (speeds up parsing)
2822 char_t endch = buffer[length - 1];
2823 buffer[length - 1] = 0;
2824
2825 // skip BOM to make sure it does not end up as part of parse output
2826 char_t* buffer_data = parse_skip_bom(buffer);
2827
2828 // perform actual parsing
2829 parser.parse_tree(buffer_data, root, optmsk, endch);
2830
2831 // update allocator state
2832 alloc = parser.alloc;
2833
2834 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
2835 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
2836
2837 if (result)
2838 {
2839 // since we removed last character, we have to handle the only possible false positive (stray <)
2840 if (endch == '<')
2841 return make_parse_result(status_unrecognized_tag, length - 1);
2842
2843 // check if there are any element nodes parsed
2844 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
2845
2846 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
2847 return make_parse_result(status_no_document_element, length - 1);
2848 }
2849 else
2850 {
2851 // roll back offset if it occurs on a null terminator in the source buffer
2852 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
2853 result.offset--;
2854 }
2855
2856 return result;
2857 }
2858 };
2859
2860 // Output facilities
2862 {
2863 #ifdef PUGIXML_WCHAR_MODE
2864 return get_wchar_encoding();
2865 #else
2866 return encoding_utf8;
2867 #endif
2868 }
2869
2870 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
2871 {
2872 // replace wchar encoding with utf implementation
2873 if (encoding == encoding_wchar) return get_wchar_encoding();
2874
2875 // replace utf16 encoding with utf16 with specific endianness
2876 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2877
2878 // replace utf32 encoding with utf32 with specific endianness
2879 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2880
2881 // only do autodetection if no explicit encoding is requested
2882 if (encoding != encoding_auto) return encoding;
2883
2884 // assume utf8 encoding
2885 return encoding_utf8;
2886 }
2887
2888#ifdef PUGIXML_WCHAR_MODE
2889 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
2890 {
2891 assert(length > 0);
2892
2893 // discard last character if it's the lead of a surrogate pair
2894 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
2895 }
2896
2897 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
2898 {
2899 // only endian-swapping is required
2900 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
2901 {
2902 convert_wchar_endian_swap(r_char, data, length);
2903
2904 return length * sizeof(char_t);
2905 }
2906
2907 // convert to utf8
2908 if (encoding == encoding_utf8)
2909 {
2910 uint8_t* dest = r_u8;
2911 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
2912
2913 return static_cast<size_t>(end - dest);
2914 }
2915
2916 // convert to utf16
2917 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2918 {
2919 uint16_t* dest = r_u16;
2920
2921 // convert to native utf16
2923
2924 // swap if necessary
2925 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2926
2927 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2928
2929 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2930 }
2931
2932 // convert to utf32
2933 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2934 {
2935 uint32_t* dest = r_u32;
2936
2937 // convert to native utf32
2939
2940 // swap if necessary
2941 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2942
2943 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2944
2945 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2946 }
2947
2948 // convert to latin1
2949 if (encoding == encoding_latin1)
2950 {
2951 uint8_t* dest = r_u8;
2953
2954 return static_cast<size_t>(end - dest);
2955 }
2956
2957 assert(!"Invalid encoding");
2958 return 0;
2959 }
2960#else
2961 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
2962 {
2963 assert(length > 4);
2964
2965 for (size_t i = 1; i <= 4; ++i)
2966 {
2967 uint8_t ch = static_cast<uint8_t>(data[length - i]);
2968
2969 // either a standalone character or a leading one
2970 if ((ch & 0xc0) != 0x80) return length - i;
2971 }
2972
2973 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2974 return length;
2975 }
2976
2977 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
2978 {
2979 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2980 {
2981 uint16_t* dest = r_u16;
2982
2983 // convert to native utf16
2984 uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2985
2986 // swap if necessary
2987 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2988
2989 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2990
2991 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2992 }
2993
2994 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2995 {
2996 uint32_t* dest = r_u32;
2997
2998 // convert to native utf32
2999 uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
3000
3001 // swap if necessary
3002 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3003
3004 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
3005
3006 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
3007 }
3008
3009 if (encoding == encoding_latin1)
3010 {
3011 uint8_t* dest = r_u8;
3012 uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
3013
3014 return static_cast<size_t>(end - dest);
3015 }
3016
3017 assert(!"Invalid encoding");
3018 return 0;
3019 }
3020#endif
3021
3023 {
3026
3027 public:
3028 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3029 {
3031 }
3032
3034 {
3035 flush();
3036 }
3037
3038 void flush()
3039 {
3041 bufsize = 0;
3042 }
3043
3044 void flush(const char_t* data, size_t size)
3045 {
3046 if (size == 0) return;
3047
3048 // fast path, just write data
3050 writer.write(data, size * sizeof(char_t));
3051 else
3052 {
3053 // convert chunk
3054 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3055 assert(result <= sizeof(scratch));
3056
3057 // write data
3058 writer.write(scratch.data_u8, result);
3059 }
3060 }
3061
3062 void write(const char_t* data, size_t length)
3063 {
3064 if (bufsize + length > bufcapacity)
3065 {
3066 // flush the remaining buffer contents
3067 flush();
3068
3069 // handle large chunks
3070 if (length > bufcapacity)
3071 {
3073 {
3074 // fast path, can just write data chunk
3075 writer.write(data, length * sizeof(char_t));
3076 return;
3077 }
3078
3079 // need to convert in suitable chunks
3080 while (length > bufcapacity)
3081 {
3082 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3083 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3084 size_t chunk_size = get_valid_length(data, bufcapacity);
3085
3086 // convert chunk and write
3087 flush(data, chunk_size);
3088
3089 // iterate
3090 data += chunk_size;
3091 length -= chunk_size;
3092 }
3093
3094 // small tail is copied below
3095 bufsize = 0;
3096 }
3097 }
3098
3099 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3100 bufsize += length;
3101 }
3102
3103 void write(const char_t* data)
3104 {
3105 write(data, strlength(data));
3106 }
3107
3108 void write(char_t d0)
3109 {
3110 if (bufsize + 1 > bufcapacity) flush();
3111
3112 buffer[bufsize + 0] = d0;
3113 bufsize += 1;
3114 }
3115
3116 void write(char_t d0, char_t d1)
3117 {
3118 if (bufsize + 2 > bufcapacity) flush();
3119
3120 buffer[bufsize + 0] = d0;
3121 buffer[bufsize + 1] = d1;
3122 bufsize += 2;
3123 }
3124
3125 void write(char_t d0, char_t d1, char_t d2)
3126 {
3127 if (bufsize + 3 > bufcapacity) flush();
3128
3129 buffer[bufsize + 0] = d0;
3130 buffer[bufsize + 1] = d1;
3131 buffer[bufsize + 2] = d2;
3132 bufsize += 3;
3133 }
3134
3135 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3136 {
3137 if (bufsize + 4 > bufcapacity) flush();
3138
3139 buffer[bufsize + 0] = d0;
3140 buffer[bufsize + 1] = d1;
3141 buffer[bufsize + 2] = d2;
3142 buffer[bufsize + 3] = d3;
3143 bufsize += 4;
3144 }
3145
3146 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3147 {
3148 if (bufsize + 5 > bufcapacity) flush();
3149
3150 buffer[bufsize + 0] = d0;
3151 buffer[bufsize + 1] = d1;
3152 buffer[bufsize + 2] = d2;
3153 buffer[bufsize + 3] = d3;
3154 buffer[bufsize + 4] = d4;
3155 bufsize += 5;
3156 }
3157
3158 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3159 {
3160 if (bufsize + 6 > bufcapacity) flush();
3161
3162 buffer[bufsize + 0] = d0;
3163 buffer[bufsize + 1] = d1;
3164 buffer[bufsize + 2] = d2;
3165 buffer[bufsize + 3] = d3;
3166 buffer[bufsize + 4] = d4;
3167 buffer[bufsize + 5] = d5;
3168 bufsize += 6;
3169 }
3170
3171 // utf8 maximum expansion: x4 (-> utf32)
3172 // utf16 maximum expansion: x2 (-> utf32)
3173 // utf32 maximum expansion: x1
3174 enum
3175 {
3177 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3178 PUGIXML_MEMORY_OUTPUT_STACK
3179 #else
3180 10240
3181 #endif
3183 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3185
3187
3188 union
3189 {
3195
3196 xml_writer& writer;
3197 size_t bufsize;
3198 xml_encoding encoding;
3199 };
3200
3202 {
3203 while (*s)
3204 {
3205 const char_t* prev = s;
3206
3207 // While *s is a usual symbol
3208 while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
3209
3210 writer.write(prev, static_cast<size_t>(s - prev));
3211
3212 switch (*s)
3213 {
3214 case 0: break;
3215 case '&':
3216 writer.write('&', 'a', 'm', 'p', ';');
3217 ++s;
3218 break;
3219 case '<':
3220 writer.write('&', 'l', 't', ';');
3221 ++s;
3222 break;
3223 case '>':
3224 writer.write('&', 'g', 't', ';');
3225 ++s;
3226 break;
3227 case '"':
3228 writer.write('&', 'q', 'u', 'o', 't', ';');
3229 ++s;
3230 break;
3231 default: // s is not a usual symbol
3232 {
3233 unsigned int ch = static_cast<unsigned int>(*s++);
3234 assert(ch < 32);
3235
3236 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3237 }
3238 }
3239 }
3240 }
3241
3242 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3243 {
3244 if (flags & format_no_escapes)
3245 writer.write(s);
3246 else
3247 text_output_escaped(writer, s, type);
3248 }
3249
3250 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3251 {
3252 do
3253 {
3254 writer.write('<', '!', '[', 'C', 'D');
3255 writer.write('A', 'T', 'A', '[');
3256
3257 const char_t* prev = s;
3258
3259 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3260 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3261
3262 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3263 if (*s) s += 2;
3264
3265 writer.write(prev, static_cast<size_t>(s - prev));
3266
3267 writer.write(']', ']', '>');
3268 }
3269 while (*s);
3270 }
3271
3272 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
3273 {
3274 const char_t* default_name = PUGIXML_TEXT(":anonymous");
3275
3276 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
3277 {
3278 writer.write(' ');
3279 writer.write(a.name()[0] ? a.name() : default_name);
3280 writer.write('=', '"');
3281
3282 text_output(writer, a.value(), ctx_special_attr, flags);
3283
3284 writer.write('"');
3285 }
3286 }
3287
3288 PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
3289 {
3290 const char_t* default_name = PUGIXML_TEXT(":anonymous");
3291
3292 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
3293 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
3294
3295 switch (node.type())
3296 {
3297 case node_document:
3298 {
3299 for (xml_node n = node.first_child(); n; n = n.next_sibling())
3300 node_output(writer, n, indent, flags, depth);
3301 break;
3302 }
3303
3304 case node_element:
3305 {
3306 const char_t* name = node.name()[0] ? node.name() : default_name;
3307
3308 writer.write('<');
3309 writer.write(name);
3310
3311 node_output_attributes(writer, node, flags);
3312
3313 if (flags & format_raw)
3314 {
3315 if (!node.first_child())
3316 writer.write(' ', '/', '>');
3317 else
3318 {
3319 writer.write('>');
3320
3321 for (xml_node n = node.first_child(); n; n = n.next_sibling())
3322 node_output(writer, n, indent, flags, depth + 1);
3323
3324 writer.write('<', '/');
3325 writer.write(name);
3326 writer.write('>');
3327 }
3328 }
3329 else if (!node.first_child())
3330 writer.write(' ', '/', '>', '\n');
3331 else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
3332 {
3333 writer.write('>');
3334
3335 if (node.first_child().type() == node_pcdata)
3336 text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
3337 else
3338 text_output_cdata(writer, node.first_child().value());
3339
3340 writer.write('<', '/');
3341 writer.write(name);
3342 writer.write('>', '\n');
3343 }
3344 else
3345 {
3346 writer.write('>', '\n');
3347
3348 for (xml_node n = node.first_child(); n; n = n.next_sibling())
3349 node_output(writer, n, indent, flags, depth + 1);
3350
3351 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
3352 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
3353
3354 writer.write('<', '/');
3355 writer.write(name);
3356 writer.write('>', '\n');
3357 }
3358
3359 break;
3360 }
3361
3362 case node_pcdata:
3363 text_output(writer, node.value(), ctx_special_pcdata, flags);
3364 if ((flags & format_raw) == 0) writer.write('\n');
3365 break;
3366
3367 case node_cdata:
3368 text_output_cdata(writer, node.value());
3369 if ((flags & format_raw) == 0) writer.write('\n');
3370 break;
3371
3372 case node_comment:
3373 writer.write('<', '!', '-', '-');
3374 writer.write(node.value());
3375 writer.write('-', '-', '>');
3376 if ((flags & format_raw) == 0) writer.write('\n');
3377 break;
3378
3379 case node_pi:
3380 case node_declaration:
3381 writer.write('<', '?');
3382 writer.write(node.name()[0] ? node.name() : default_name);
3383
3384 if (node.type() == node_declaration)
3385 {
3386 node_output_attributes(writer, node, flags);
3387 }
3388 else if (node.value()[0])
3389 {
3390 writer.write(' ');
3391 writer.write(node.value());
3392 }
3393
3394 writer.write('?', '>');
3395 if ((flags & format_raw) == 0) writer.write('\n');
3396 break;
3397
3398 case node_doctype:
3399 writer.write('<', '!', 'D', 'O', 'C');
3400 writer.write('T', 'Y', 'P', 'E');
3401
3402 if (node.value()[0])
3403 {
3404 writer.write(' ');
3405 writer.write(node.value());
3406 }
3407
3408 writer.write('>');
3409 if ((flags & format_raw) == 0) writer.write('\n');
3410 break;
3411
3412 default:
3413 assert(!"Invalid node type");
3414 }
3415 }
3416
3417 inline bool has_declaration(const xml_node& node)
3418 {
3419 for (xml_node child = node.first_child(); child; child = child.next_sibling())
3420 {
3421 xml_node_type type = child.type();
3422
3423 if (type == node_declaration) return true;
3424 if (type == node_element) return false;
3425 }
3426
3427 return false;
3428 }
3429
3430 inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
3431 {
3432 if (parent != node_document && parent != node_element) return false;
3433 if (child == node_document || child == node_null) return false;
3434 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
3435
3436 return true;
3437 }
3438
3439 PUGI__FN bool allow_move(const xml_node& parent, const xml_node& child)
3440 {
3441 // check that child can be a child of parent
3442 if (!allow_insert_child(parent.type(), child.type()))
3443 return false;
3444
3445 // check that node is not moved between documents
3446 if (parent.root() != child.root())
3447 return false;
3448
3449 // check that new parent is not in the child subtree
3450 xml_node cur = parent;
3451
3452 while (cur)
3453 {
3454 if (cur == child)
3455 return false;
3456
3457 cur = cur.parent();
3458 }
3459
3460 return true;
3461 }
3462
3463 PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
3464 {
3465 assert(dest.type() == source.type());
3466
3467 switch (source.type())
3468 {
3469 case node_element:
3470 {
3471 dest.set_name(source.name());
3472
3473 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3474 dest.append_attribute(a.name()).set_value(a.value());
3475
3476 for (xml_node c = source.first_child(); c; c = c.next_sibling())
3477 {
3478 if (c == skip) continue;
3479
3480 xml_node cc = dest.append_child(c.type());
3481 assert(cc);
3482
3483 recursive_copy_skip(cc, c, skip);
3484 }
3485
3486 break;
3487 }
3488
3489 case node_pcdata:
3490 case node_cdata:
3491 case node_comment:
3492 case node_doctype:
3493 dest.set_value(source.value());
3494 break;
3495
3496 case node_pi:
3497 dest.set_name(source.name());
3498 dest.set_value(source.value());
3499 break;
3500
3501 case node_declaration:
3502 {
3503 dest.set_name(source.name());
3504
3505 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3506 dest.append_attribute(a.name()).set_value(a.value());
3507
3508 break;
3509 }
3510
3511 default:
3512 assert(!"Invalid node type");
3513 }
3514 }
3515
3516 inline bool is_text_node(xml_node_struct* node)
3517 {
3518 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
3519
3520 return type == node_pcdata || type == node_cdata;
3521 }
3522
3523 // get value with conversion functions
3524 PUGI__FN int get_integer_base(const char_t* value)
3525 {
3526 const char_t* s = value;
3527
3528 while (PUGI__IS_CHARTYPE(*s, ct_space))
3529 s++;
3530
3531 if (*s == '-')
3532 s++;
3533
3534 return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10;
3535 }
3536
3537 PUGI__FN int get_value_int(const char_t* value, int def)
3538 {
3539 if (!value) return def;
3540
3541 int base = get_integer_base(value);
3542
3543 #ifdef PUGIXML_WCHAR_MODE
3544 return static_cast<int>(wcstol(value, 0, base));
3545 #else
3546 return static_cast<int>(strtol(value, 0, base));
3547 #endif
3548 }
3549
3550 PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
3551 {
3552 if (!value) return def;
3553
3554 int base = get_integer_base(value);
3555
3556 #ifdef PUGIXML_WCHAR_MODE
3557 return static_cast<unsigned int>(wcstoul(value, 0, base));
3558 #else
3559 return static_cast<unsigned int>(strtoul(value, 0, base));
3560 #endif
3561 }
3562
3563 PUGI__FN double get_value_double(const char_t* value, double def)
3564 {
3565 if (!value) return def;
3566
3567 #ifdef PUGIXML_WCHAR_MODE
3568 return wcstod(value, 0);
3569 #else
3570 return strtod(value, 0);
3571 #endif
3572 }
3573
3574 PUGI__FN float get_value_float(const char_t* value, float def)
3575 {
3576 if (!value) return def;
3577
3578 #ifdef PUGIXML_WCHAR_MODE
3579 return static_cast<float>(wcstod(value, 0));
3580 #else
3581 return static_cast<float>(strtod(value, 0));
3582 #endif
3583 }
3584
3585 PUGI__FN bool get_value_bool(const char_t* value, bool def)
3586 {
3587 if (!value) return def;
3588
3589 // only look at first char
3590 char_t first = *value;
3591
3592 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3593 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3594 }
3595
3596#ifdef PUGIXML_HAS_LONG_LONG
3597 PUGI__FN long long get_value_llong(const char_t* value, long long def)
3598 {
3599 if (!value) return def;
3600
3601 int base = get_integer_base(value);
3602
3603 #ifdef PUGIXML_WCHAR_MODE
3604 #ifdef PUGI__MSVC_CRT_VERSION
3605 return _wcstoi64(value, 0, base);
3606 #else
3607 return wcstoll(value, 0, base);
3608 #endif
3609 #else
3610 #ifdef PUGI__MSVC_CRT_VERSION
3611 return _strtoi64(value, 0, base);
3612 #else
3613 return strtoll(value, 0, base);
3614 #endif
3615 #endif
3616 }
3617
3618 PUGI__FN unsigned long long get_value_ullong(const char_t* value, unsigned long long def)
3619 {
3620 if (!value) return def;
3621
3622 int base = get_integer_base(value);
3623
3624 #ifdef PUGIXML_WCHAR_MODE
3625 #ifdef PUGI__MSVC_CRT_VERSION
3626 return _wcstoui64(value, 0, base);
3627 #else
3628 return wcstoull(value, 0, base);
3629 #endif
3630 #else
3631 #ifdef PUGI__MSVC_CRT_VERSION
3632 return _strtoui64(value, 0, base);
3633 #else
3634 return strtoull(value, 0, base);
3635 #endif
3636 #endif
3637 }
3638#endif
3639
3640 // set value with conversion functions
3641 PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
3642 {
3643 #ifdef PUGIXML_WCHAR_MODE
3644 char_t wbuf[128];
3645 impl::widen_ascii(wbuf, buf);
3646
3647 return strcpy_insitu(dest, header, header_mask, wbuf);
3648 #else
3649 return strcpy_insitu(dest, header, header_mask, buf);
3650 #endif
3651 }
3652
3653 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
3654 {
3655 char buf[128];
3656 sprintf(buf, "%d", value);
3657
3658 return set_value_buffer(dest, header, header_mask, buf);
3659 }
3660
3661 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
3662 {
3663 char buf[128];
3664 sprintf(buf, "%u", value);
3665
3666 return set_value_buffer(dest, header, header_mask, buf);
3667 }
3668
3669 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
3670 {
3671 char buf[128];
3672 sprintf(buf, "%g", value);
3673
3674 return set_value_buffer(dest, header, header_mask, buf);
3675 }
3676
3677 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
3678 {
3679 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
3680 }
3681
3682#ifdef PUGIXML_HAS_LONG_LONG
3683 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value)
3684 {
3685 char buf[128];
3686 sprintf(buf, "%lld", value);
3687
3688 return set_value_buffer(dest, header, header_mask, buf);
3689 }
3690
3691 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value)
3692 {
3693 char buf[128];
3694 sprintf(buf, "%llu", value);
3695
3696 return set_value_buffer(dest, header, header_mask, buf);
3697 }
3698#endif
3699
3700 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3701 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
3702 {
3703 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
3704 // there are 64-bit versions of fseek/ftell, let's use them
3705 typedef __int64 length_type;
3706
3707 _fseeki64(file, 0, SEEK_END);
3708 length_type length = _ftelli64(file);
3709 _fseeki64(file, 0, SEEK_SET);
3710 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
3711 // there are 64-bit versions of fseek/ftell, let's use them
3712 typedef off64_t length_type;
3713
3714 fseeko64(file, 0, SEEK_END);
3715 length_type length = ftello64(file);
3716 fseeko64(file, 0, SEEK_SET);
3717 #else
3718 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3719 typedef long length_type;
3720
3721 fseek(file, 0, SEEK_END);
3722 length_type length = ftell(file);
3723 fseek(file, 0, SEEK_SET);
3724 #endif
3725
3726 // check for I/O errors
3727 if (length < 0) return status_io_error;
3728
3729 // check for overflow
3730 size_t result = static_cast<size_t>(length);
3731
3732 if (static_cast<length_type>(result) != length) return status_out_of_memory;
3733
3734 // finalize
3735 out_result = result;
3736
3737 return status_ok;
3738 }
3739
3740 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
3741 {
3742 // We only need to zero-terminate if encoding conversion does not do it for us
3743 #ifdef PUGIXML_WCHAR_MODE
3744 xml_encoding wchar_encoding = get_wchar_encoding();
3745
3746 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
3747 {
3748 size_t length = size / sizeof(char_t);
3749
3750 static_cast<char_t*>(buffer)[length] = 0;
3751 return (length + 1) * sizeof(char_t);
3752 }
3753 #else
3754 if (encoding == encoding_utf8)
3755 {
3756 static_cast<char*>(buffer)[size] = 0;
3757 return size + 1;
3758 }
3759 #endif
3760
3761 return size;
3762 }
3763
3764 PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
3765 {
3766 if (!file) return make_parse_result(status_file_not_found);
3767
3768 // get file size (can result in I/O errors)
3769 size_t size = 0;
3770 xml_parse_status size_status = get_file_size(file, size);
3771
3772 if (size_status != status_ok)
3773 {
3774 fclose(file);
3775 return make_parse_result(size_status);
3776 }
3777
3778 size_t max_suffix_size = sizeof(char_t);
3779
3780 // allocate buffer for the whole file
3781 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
3782
3783 if (!contents)
3784 {
3785 fclose(file);
3786 return make_parse_result(status_out_of_memory);
3787 }
3788
3789 // read file in memory
3790 size_t read_size = fread(contents, 1, size, file);
3791 fclose(file);
3792
3793 if (read_size != size)
3794 {
3795 xml_memory::deallocate(contents);
3796 return make_parse_result(status_io_error);
3797 }
3798
3799 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
3800
3801 return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);
3802 }
3803
3804#ifndef PUGIXML_NO_STL
3805 template <typename T> struct xml_stream_chunk
3806 {
3808 {
3809 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
3810
3811 return new (memory) xml_stream_chunk();
3812 }
3813
3814 static void destroy(void* ptr)
3815 {
3816 xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
3817
3818 // free chunk chain
3819 while (chunk)
3820 {
3821 xml_stream_chunk* next = chunk->next;
3823 chunk = next;
3824 }
3825 }
3826
3828 {
3829 }
3830
3832 size_t size;
3833
3835 };
3836
3837 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3838 {
3840
3841 // read file to a chunk list
3842 size_t total = 0;
3843 xml_stream_chunk<T>* last = 0;
3844
3845 while (!stream.eof())
3846 {
3847 // allocate new chunk
3849 if (!chunk) return status_out_of_memory;
3850
3851 // append chunk to list
3852 if (last) last = last->next = chunk;
3853 else chunks.data = last = chunk;
3854
3855 // read data to chunk
3856 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
3857 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
3858
3859 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
3860 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
3861
3862 // guard against huge files (chunk size is small enough to make this overflow check work)
3863 if (total + chunk->size < total) return status_out_of_memory;
3864 total += chunk->size;
3865 }
3866
3867 size_t max_suffix_size = sizeof(char_t);
3868
3869 // copy chunk list to a contiguous buffer
3870 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
3871 if (!buffer) return status_out_of_memory;
3872
3873 char* write = buffer;
3874
3875 for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
3876 {
3877 assert(write + chunk->size <= buffer + total);
3878 memcpy(write, chunk->data, chunk->size);
3879 write += chunk->size;
3880 }
3881
3882 assert(write == buffer + total);
3883
3884 // return buffer
3885 *out_buffer = buffer;
3886 *out_size = total;
3887
3888 return status_ok;
3889 }
3890
3891 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3892 {
3893 // get length of remaining data in stream
3894 typename std::basic_istream<T>::pos_type pos = stream.tellg();
3895 stream.seekg(0, std::ios::end);
3896 std::streamoff length = stream.tellg() - pos;
3897 stream.seekg(pos);
3898
3899 if (stream.fail() || pos < 0) return status_io_error;
3900
3901 // guard against huge files
3902 size_t read_length = static_cast<size_t>(length);
3903
3904 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
3905
3906 size_t max_suffix_size = sizeof(char_t);
3907
3908 // read stream data into memory (guard against stream exceptions with buffer holder)
3909 buffer_holder buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
3910 if (!buffer.data) return status_out_of_memory;
3911
3912 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
3913
3914 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
3915 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
3916
3917 // return buffer
3918 size_t actual_length = static_cast<size_t>(stream.gcount());
3919 assert(actual_length <= read_length);
3920
3921 *out_buffer = buffer.release();
3922 *out_size = actual_length * sizeof(T);
3923
3924 return status_ok;
3925 }
3926
3927 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
3928 {
3929 void* buffer = 0;
3930 size_t size = 0;
3931 xml_parse_status status = status_ok;
3932
3933 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
3934 if (stream.fail()) return make_parse_result(status_io_error);
3935
3936 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
3937 if (stream.tellg() < 0)
3938 {
3939 stream.clear(); // clear error flags that could be set by a failing tellg
3940 status = load_stream_data_noseek(stream, &buffer, &size);
3941 }
3942 else
3943 status = load_stream_data_seek(stream, &buffer, &size);
3944
3945 if (status != status_ok) return make_parse_result(status);
3946
3947 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
3948
3949 return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);
3950 }
3951#endif
3952
3953#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
3954 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3955 {
3956 return _wfopen(path, mode);
3957 }
3958#else
3959 PUGI__FN char* convert_path_heap(const wchar_t* str)
3960 {
3961 assert(str);
3962
3963 // first pass: get length in utf8 characters
3964 size_t length = strlength_wide(str);
3965 size_t size = as_utf8_begin(str, length);
3966
3967 // allocate resulting string
3968 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
3969 if (!result) return 0;
3970
3971 // second pass: convert to utf8
3972 as_utf8_end(result, size, str, length);
3973
3974 return result;
3975 }
3976
3977 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3978 {
3979 // there is no standard function to open wide paths, so our best bet is to try utf8 path
3980 char* path_utf8 = convert_path_heap(path);
3981 if (!path_utf8) return 0;
3982
3983 // convert mode to ASCII (we mirror _wfopen interface)
3984 char mode_ascii[4] = {0};
3985 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
3986
3987 // try to open the utf8 path
3988 FILE* result = fopen(path_utf8, mode_ascii);
3989
3990 // free dummy buffer
3991 xml_memory::deallocate(path_utf8);
3992
3993 return result;
3994 }
3995#endif
3996
3997 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
3998 {
3999 if (!file) return false;
4000
4001 xml_writer_file writer(file);
4002 doc.save(writer, indent, flags, encoding);
4003
4004 int result = ferror(file);
4005
4006 fclose(file);
4007
4008 return result == 0;
4009 }
4010
4011 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4012 {
4013 // check input buffer
4014 assert(contents || size == 0);
4015
4016 // get actual encoding
4017 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4018
4019 // get private buffer
4020 char_t* buffer = 0;
4021 size_t length = 0;
4022
4023 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4024
4025 // delete original buffer if we performed a conversion
4026 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4027
4028 // store buffer for offset_debug
4029 doc->buffer = buffer;
4030
4031 // parse
4032 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4033
4034 // remember encoding
4035 res.encoding = buffer_encoding;
4036
4037 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4038 if (own || buffer != contents) *out_buffer = buffer;
4039
4040 return res;
4041 }
4043
4044namespace pugi
4045{
4046 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
4047 {
4048 }
4049
4050 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
4051 {
4052 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
4053 (void)!result; // unfortunately we can't do proper error handling here
4054 }
4055
4056#ifndef PUGIXML_NO_STL
4057 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
4058 {
4059 }
4060
4061 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
4062 {
4063 }
4064
4065 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
4066 {
4067 if (narrow_stream)
4068 {
4069 assert(!wide_stream);
4070 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
4071 }
4072 else
4073 {
4074 assert(wide_stream);
4075 assert(size % sizeof(wchar_t) == 0);
4076
4077 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
4078 }
4079 }
4080#endif
4081
4082 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
4083 {
4084 }
4085
4086 PUGI__FN xml_tree_walker::~xml_tree_walker()
4087 {
4088 }
4089
4090 PUGI__FN int xml_tree_walker::depth() const
4091 {
4092 return _depth;
4093 }
4094
4095 PUGI__FN bool xml_tree_walker::begin(xml_node&)
4096 {
4097 return true;
4098 }
4099
4100 PUGI__FN bool xml_tree_walker::end(xml_node&)
4101 {
4102 return true;
4103 }
4104
4105 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
4106 {
4107 }
4108
4109 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
4110 {
4111 }
4112
4113 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
4114 {
4115 }
4116
4117 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
4118 {
4119 return _attr ? unspecified_bool_xml_attribute : 0;
4120 }
4121
4122 PUGI__FN bool xml_attribute::operator!() const
4123 {
4124 return !_attr;
4125 }
4126
4127 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
4128 {
4129 return (_attr == r._attr);
4130 }
4131
4132 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
4133 {
4134 return (_attr != r._attr);
4135 }
4136
4137 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
4138 {
4139 return (_attr < r._attr);
4140 }
4141
4142 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
4143 {
4144 return (_attr > r._attr);
4145 }
4146
4147 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
4148 {
4149 return (_attr <= r._attr);
4150 }
4151
4152 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
4153 {
4154 return (_attr >= r._attr);
4155 }
4156
4157 PUGI__FN xml_attribute xml_attribute::next_attribute() const
4158 {
4159 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
4160 }
4161
4162 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
4163 {
4164 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
4165 }
4166
4167 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
4168 {
4169 return (_attr && _attr->value) ? _attr->value : def;
4170 }
4171
4172 PUGI__FN int xml_attribute::as_int(int def) const
4173 {
4174 return impl::get_value_int(_attr ? _attr->value : 0, def);
4175 }
4176
4177 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
4178 {
4179 return impl::get_value_uint(_attr ? _attr->value : 0, def);
4180 }
4181
4182 PUGI__FN double xml_attribute::as_double(double def) const
4183 {
4184 return impl::get_value_double(_attr ? _attr->value : 0, def);
4185 }
4186
4187 PUGI__FN float xml_attribute::as_float(float def) const
4188 {
4189 return impl::get_value_float(_attr ? _attr->value : 0, def);
4190 }
4191
4192 PUGI__FN bool xml_attribute::as_bool(bool def) const
4193 {
4194 return impl::get_value_bool(_attr ? _attr->value : 0, def);
4195 }
4196
4197#ifdef PUGIXML_HAS_LONG_LONG
4198 PUGI__FN long long xml_attribute::as_llong(long long def) const
4199 {
4200 return impl::get_value_llong(_attr ? _attr->value : 0, def);
4201 }
4202
4203 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
4204 {
4205 return impl::get_value_ullong(_attr ? _attr->value : 0, def);
4206 }
4207#endif
4208
4209 PUGI__FN bool xml_attribute::empty() const
4210 {
4211 return !_attr;
4212 }
4213
4214 PUGI__FN const char_t* xml_attribute::name() const
4215 {
4216 return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
4217 }
4218
4219 PUGI__FN const char_t* xml_attribute::value() const
4220 {
4221 return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
4222 }
4223
4224 PUGI__FN size_t xml_attribute::hash_value() const
4225 {
4226 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
4227 }
4228
4229 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
4230 {
4231 return _attr;
4232 }
4233
4234 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
4235 {
4236 set_value(rhs);
4237 return *this;
4238 }
4239
4240 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
4241 {
4242 set_value(rhs);
4243 return *this;
4244 }
4245
4246 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
4247 {
4248 set_value(rhs);
4249 return *this;
4250 }
4251
4252 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
4253 {
4254 set_value(rhs);
4255 return *this;
4256 }
4257
4258 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
4259 {
4260 set_value(rhs);
4261 return *this;
4262 }
4263
4264#ifdef PUGIXML_HAS_LONG_LONG
4265 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
4266 {
4267 set_value(rhs);
4268 return *this;
4269 }
4270
4271 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
4272 {
4273 set_value(rhs);
4274 return *this;
4275 }
4276#endif
4277
4278 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
4279 {
4280 if (!_attr) return false;
4281
4282 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
4283 }
4284
4285 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
4286 {
4287 if (!_attr) return false;
4288
4289 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4290 }
4291
4292 PUGI__FN bool xml_attribute::set_value(int rhs)
4293 {
4294 if (!_attr) return false;
4295
4296 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4297 }
4298
4299 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
4300 {
4301 if (!_attr) return false;
4302
4303 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4304 }
4305
4306 PUGI__FN bool xml_attribute::set_value(double rhs)
4307 {
4308 if (!_attr) return false;
4309
4310 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4311 }
4312
4313 PUGI__FN bool xml_attribute::set_value(bool rhs)
4314 {
4315 if (!_attr) return false;
4316
4317 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4318 }
4319
4320#ifdef PUGIXML_HAS_LONG_LONG
4321 PUGI__FN bool xml_attribute::set_value(long long rhs)
4322 {
4323 if (!_attr) return false;
4324
4325 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4326 }
4327
4328 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
4329 {
4330 if (!_attr) return false;
4331
4332 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
4333 }
4334#endif
4335
4336#ifdef __BORLANDC__
4337 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
4338 {
4339 return (bool)lhs && rhs;
4340 }
4341
4342 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
4343 {
4344 return (bool)lhs || rhs;
4345 }
4346#endif
4347
4348 PUGI__FN xml_node::xml_node(): _root(0)
4349 {
4350 }
4351
4352 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
4353 {
4354 }
4355
4356 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
4357 {
4358 }
4359
4360 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
4361 {
4362 return _root ? unspecified_bool_xml_node : 0;
4363 }
4364
4365 PUGI__FN bool xml_node::operator!() const
4366 {
4367 return !_root;
4368 }
4369
4370 PUGI__FN xml_node::iterator xml_node::begin() const
4371 {
4372 return iterator(_root ? _root->first_child : 0, _root);
4373 }
4374
4375 PUGI__FN xml_node::iterator xml_node::end() const
4376 {
4377 return iterator(0, _root);
4378 }
4379
4380 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
4381 {
4382 return attribute_iterator(_root ? _root->first_attribute : 0, _root);
4383 }
4384
4385 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
4386 {
4387 return attribute_iterator(0, _root);
4388 }
4389
4390 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
4391 {
4392 return xml_object_range<xml_node_iterator>(begin(), end());
4393 }
4394
4395 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
4396 {
4397 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
4398 }
4399
4400 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
4401 {
4402 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
4403 }
4404
4405 PUGI__FN bool xml_node::operator==(const xml_node& r) const
4406 {
4407 return (_root == r._root);
4408 }
4409
4410 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
4411 {
4412 return (_root != r._root);
4413 }
4414
4415 PUGI__FN bool xml_node::operator<(const xml_node& r) const
4416 {
4417 return (_root < r._root);
4418 }
4419
4420 PUGI__FN bool xml_node::operator>(const xml_node& r) const
4421 {
4422 return (_root > r._root);
4423 }
4424
4425 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
4426 {
4427 return (_root <= r._root);
4428 }
4429
4430 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
4431 {
4432 return (_root >= r._root);
4433 }
4434
4435 PUGI__FN bool xml_node::empty() const
4436 {
4437 return !_root;
4438 }
4439
4440 PUGI__FN const char_t* xml_node::name() const
4441 {
4442 return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
4443 }
4444
4445 PUGI__FN xml_node_type xml_node::type() const
4446 {
4447 return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
4448 }
4449
4450 PUGI__FN const char_t* xml_node::value() const
4451 {
4452 return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
4453 }
4454
4455 PUGI__FN xml_node xml_node::child(const char_t* name_) const
4456 {
4457 if (!_root) return xml_node();
4458
4459 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4460 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
4461
4462 return xml_node();
4463 }
4464
4465 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
4466 {
4467 if (!_root) return xml_attribute();
4468
4469 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
4470 if (i->name && impl::strequal(name_, i->name))
4471 return xml_attribute(i);
4472
4473 return xml_attribute();
4474 }
4475
4476 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
4477 {
4478 if (!_root) return xml_node();
4479
4480 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
4481 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
4482
4483 return xml_node();
4484 }
4485
4486 PUGI__FN xml_node xml_node::next_sibling() const
4487 {
4488 if (!_root) return xml_node();
4489
4490 if (_root->next_sibling) return xml_node(_root->next_sibling);
4491 else return xml_node();
4492 }
4493
4494 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
4495 {
4496 if (!_root) return xml_node();
4497
4498 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
4499 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
4500
4501 return xml_node();
4502 }
4503
4504 PUGI__FN xml_node xml_node::previous_sibling() const
4505 {
4506 if (!_root) return xml_node();
4507
4508 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
4509 else return xml_node();
4510 }
4511
4512 PUGI__FN xml_node xml_node::parent() const
4513 {
4514 return _root ? xml_node(_root->parent) : xml_node();
4515 }
4516
4517 PUGI__FN xml_node xml_node::root() const
4518 {
4519 if (!_root) return xml_node();
4520
4521 impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
4522
4523 return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
4524 }
4525
4526 PUGI__FN xml_text xml_node::text() const
4527 {
4528 return xml_text(_root);
4529 }
4530
4531 PUGI__FN const char_t* xml_node::child_value() const
4532 {
4533 if (!_root) return PUGIXML_TEXT("");
4534
4535 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4536 if (i->value && impl::is_text_node(i))
4537 return i->value;
4538
4539 return PUGIXML_TEXT("");
4540 }
4541
4542 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
4543 {
4544 return child(name_).child_value();
4545 }
4546
4547 PUGI__FN xml_attribute xml_node::first_attribute() const
4548 {
4549 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
4550 }
4551
4552 PUGI__FN xml_attribute xml_node::last_attribute() const
4553 {
4554 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
4555 }
4556
4557 PUGI__FN xml_node xml_node::first_child() const
4558 {
4559 return _root ? xml_node(_root->first_child) : xml_node();
4560 }
4561
4562 PUGI__FN xml_node xml_node::last_child() const
4563 {
4564 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
4565 }
4566
4567 PUGI__FN bool xml_node::set_name(const char_t* rhs)
4568 {
4569 switch (type())
4570 {
4571 case node_pi:
4572 case node_declaration:
4573 case node_element:
4574 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
4575
4576 default:
4577 return false;
4578 }
4579 }
4580
4581 PUGI__FN bool xml_node::set_value(const char_t* rhs)
4582 {
4583 switch (type())
4584 {
4585 case node_pi:
4586 case node_cdata:
4587 case node_pcdata:
4588 case node_comment:
4589 case node_doctype:
4590 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
4591
4592 default:
4593 return false;
4594 }
4595 }
4596
4597 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
4598 {
4599 if (type() != node_element && type() != node_declaration) return xml_attribute();
4600
4601 xml_attribute a(impl::append_new_attribute(_root, impl::get_allocator(_root)));
4602
4603 a.set_name(name_);
4604
4605 return a;
4606 }
4607
4608 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
4609 {
4610 if (type() != node_element && type() != node_declaration) return xml_attribute();
4611
4612 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
4613 if (!a) return xml_attribute();
4614
4615 xml_attribute_struct* head = _root->first_attribute;
4616
4617 if (head)
4618 {
4619 a._attr->prev_attribute_c = head->prev_attribute_c;
4620 head->prev_attribute_c = a._attr;
4621 }
4622 else
4623 a._attr->prev_attribute_c = a._attr;
4624
4625 a._attr->next_attribute = head;
4626 _root->first_attribute = a._attr;
4627
4628 a.set_name(name_);
4629
4630 return a;
4631 }
4632
4633 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
4634 {
4635 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
4636
4637 // check that attribute belongs to *this
4638 xml_attribute_struct* cur = attr._attr;
4639
4640 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
4641
4642 if (cur != _root->first_attribute) return xml_attribute();
4643
4644 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
4645 if (!a) return xml_attribute();
4646
4647 if (attr._attr->prev_attribute_c->next_attribute)
4648 attr._attr->prev_attribute_c->next_attribute = a._attr;
4649 else
4650 _root->first_attribute = a._attr;
4651
4652 a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
4653 a._attr->next_attribute = attr._attr;
4654 attr._attr->prev_attribute_c = a._attr;
4655
4656 a.set_name(name_);
4657
4658 return a;
4659 }
4660
4661 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
4662 {
4663 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
4664
4665 // check that attribute belongs to *this
4666 xml_attribute_struct* cur = attr._attr;
4667
4668 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
4669
4670 if (cur != _root->first_attribute) return xml_attribute();
4671
4672 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
4673 if (!a) return xml_attribute();
4674
4675 if (attr._attr->next_attribute)
4676 attr._attr->next_attribute->prev_attribute_c = a._attr;
4677 else
4678 _root->first_attribute->prev_attribute_c = a._attr;
4679
4680 a._attr->next_attribute = attr._attr->next_attribute;
4681 a._attr->prev_attribute_c = attr._attr;
4682 attr._attr->next_attribute = a._attr;
4683
4684 a.set_name(name_);
4685
4686 return a;
4687 }
4688
4689 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
4690 {
4691 if (!proto) return xml_attribute();
4692
4693 xml_attribute result = append_attribute(proto.name());
4694 result.set_value(proto.value());
4695
4696 return result;
4697 }
4698
4699 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
4700 {
4701 if (!proto) return xml_attribute();
4702
4703 xml_attribute result = prepend_attribute(proto.name());
4704 result.set_value(proto.value());
4705
4706 return result;
4707 }
4708
4709 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
4710 {
4711 if (!proto) return xml_attribute();
4712
4713 xml_attribute result = insert_attribute_after(proto.name(), attr);
4714 result.set_value(proto.value());
4715
4716 return result;
4717 }
4718
4719 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
4720 {
4721 if (!proto) return xml_attribute();
4722
4723 xml_attribute result = insert_attribute_before(proto.name(), attr);
4724 result.set_value(proto.value());
4725
4726 return result;
4727 }
4728
4729 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
4730 {
4731 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4732
4733 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4734 if (!n) return xml_node();
4735
4736 impl::append_node(n._root, _root);
4737
4738 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4739
4740 return n;
4741 }
4742
4743 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
4744 {
4745 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4746
4747 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4748 if (!n) return xml_node();
4749
4750 impl::prepend_node(n._root, _root);
4751
4752 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4753
4754 return n;
4755 }
4756
4757 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
4758 {
4759 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4760 if (!node._root || node._root->parent != _root) return xml_node();
4761
4762 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4763 if (!n) return xml_node();
4764
4765 impl::insert_node_before(n._root, node._root);
4766
4767 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4768
4769 return n;
4770 }
4771
4772 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
4773 {
4774 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4775 if (!node._root || node._root->parent != _root) return xml_node();
4776
4777 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4778 if (!n) return xml_node();
4779
4780 impl::insert_node_after(n._root, node._root);
4781
4782 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4783
4784 return n;
4785 }
4786
4787 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
4788 {
4789 xml_node result = append_child(node_element);
4790
4791 result.set_name(name_);
4792
4793 return result;
4794 }
4795
4796 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
4797 {
4798 xml_node result = prepend_child(node_element);
4799
4800 result.set_name(name_);
4801
4802 return result;
4803 }
4804
4805 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
4806 {
4807 xml_node result = insert_child_after(node_element, node);
4808
4809 result.set_name(name_);
4810
4811 return result;
4812 }
4813
4814 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
4815 {
4816 xml_node result = insert_child_before(node_element, node);
4817
4818 result.set_name(name_);
4819
4820 return result;
4821 }
4822
4823 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
4824 {
4825 xml_node result = append_child(proto.type());
4826
4827 if (result) impl::recursive_copy_skip(result, proto, result);
4828
4829 return result;
4830 }
4831
4832 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
4833 {
4834 xml_node result = prepend_child(proto.type());
4835
4836 if (result) impl::recursive_copy_skip(result, proto, result);
4837
4838 return result;
4839 }
4840
4841 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
4842 {
4843 xml_node result = insert_child_after(proto.type(), node);
4844
4845 if (result) impl::recursive_copy_skip(result, proto, result);
4846
4847 return result;
4848 }
4849
4850 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
4851 {
4852 xml_node result = insert_child_before(proto.type(), node);
4853
4854 if (result) impl::recursive_copy_skip(result, proto, result);
4855
4856 return result;
4857 }
4858
4859 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
4860 {
4861 if (!impl::allow_move(*this, moved)) return xml_node();
4862
4863 impl::remove_node(moved._root);
4864 impl::append_node(moved._root, _root);
4865
4866 return moved;
4867 }
4868
4869 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
4870 {
4871 if (!impl::allow_move(*this, moved)) return xml_node();
4872
4873 impl::remove_node(moved._root);
4874 impl::prepend_node(moved._root, _root);
4875
4876 return moved;
4877 }
4878
4879 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
4880 {
4881 if (!impl::allow_move(*this, moved)) return xml_node();
4882 if (!node._root || node._root->parent != _root) return xml_node();
4883 if (moved._root == node._root) return xml_node();
4884
4885 impl::remove_node(moved._root);
4886 impl::insert_node_after(moved._root, node._root);
4887
4888 return moved;
4889 }
4890
4891 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
4892 {
4893 if (!impl::allow_move(*this, moved)) return xml_node();
4894 if (!node._root || node._root->parent != _root) return xml_node();
4895 if (moved._root == node._root) return xml_node();
4896
4897 impl::remove_node(moved._root);
4898 impl::insert_node_before(moved._root, node._root);
4899
4900 return moved;
4901 }
4902
4903 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
4904 {
4905 return remove_attribute(attribute(name_));
4906 }
4907
4908 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
4909 {
4910 if (!_root || !a._attr) return false;
4911
4912 // check that attribute belongs to *this
4913 xml_attribute_struct* attr = a._attr;
4914
4915 while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
4916
4917 if (attr != _root->first_attribute) return false;
4918
4919 if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4920 else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4921
4922 if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
4923 else _root->first_attribute = a._attr->next_attribute;
4924
4925 impl::destroy_attribute(a._attr, impl::get_allocator(_root));
4926
4927 return true;
4928 }
4929
4930 PUGI__FN bool xml_node::remove_child(const char_t* name_)
4931 {
4932 return remove_child(child(name_));
4933 }
4934
4935 PUGI__FN bool xml_node::remove_child(const xml_node& n)
4936 {
4937 if (!_root || !n._root || n._root->parent != _root) return false;
4938
4939 impl::remove_node(n._root);
4940
4941 impl::destroy_node(n._root, impl::get_allocator(_root));
4942
4943 return true;
4944 }
4945
4946 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
4947 {
4948 // append_buffer is only valid for elements/documents
4949 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
4950
4951 // get document node
4952 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(root()._root);
4953 assert(doc);
4954
4955 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
4956 impl::xml_memory_page* page = 0;
4957 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
4958 (void)page;
4959
4960 if (!extra) return impl::make_parse_result(status_out_of_memory);
4961
4962 // save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
4963 char_t* rootname = _root->name;
4964 _root->name = 0;
4965
4966 // parse
4967 char_t* buffer = 0;
4968 xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &buffer);
4969
4970 // restore name
4971 _root->name = rootname;
4972
4973 // add extra buffer to the list
4974 extra->buffer = buffer;
4975 extra->next = doc->extra_buffers;
4976 doc->extra_buffers = extra;
4977
4978 return res;
4979 }
4980
4981 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
4982 {
4983 if (!_root) return xml_node();
4984
4985 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4986 if (i->name && impl::strequal(name_, i->name))
4987 {
4988 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4989 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
4990 return xml_node(i);
4991 }
4992
4993 return xml_node();
4994 }
4995
4996 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
4997 {
4998 if (!_root) return xml_node();
4999
5000 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5001 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
5002 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
5003 return xml_node(i);
5004
5005 return xml_node();
5006 }
5007
5008#ifndef PUGIXML_NO_STL
5009 PUGI__FN string_t xml_node::path(char_t delimiter) const
5010 {
5011 xml_node cursor = *this; // Make a copy.
5012
5013 string_t result = cursor.name();
5014
5015 while (cursor.parent())
5016 {
5017 cursor = cursor.parent();
5018
5019 string_t temp = cursor.name();
5020 temp += delimiter;
5021 temp += result;
5022 result.swap(temp);
5023 }
5024
5025 return result;
5026 }
5027#endif
5028
5029 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
5030 {
5031 xml_node found = *this; // Current search context.
5032
5033 if (!_root || !path_ || !path_[0]) return found;
5034
5035 if (path_[0] == delimiter)
5036 {
5037 // Absolute path; e.g. '/foo/bar'
5038 found = found.root();
5039 ++path_;
5040 }
5041
5042 const char_t* path_segment = path_;
5043
5044 while (*path_segment == delimiter) ++path_segment;
5045
5046 const char_t* path_segment_end = path_segment;
5047
5048 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
5049
5050 if (path_segment == path_segment_end) return found;
5051
5052 const char_t* next_segment = path_segment_end;
5053
5054 while (*next_segment == delimiter) ++next_segment;
5055
5056 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
5057 return found.first_element_by_path(next_segment, delimiter);
5058 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
5059 return found.parent().first_element_by_path(next_segment, delimiter);
5060 else
5061 {
5062 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
5063 {
5064 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
5065 {
5066 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
5067
5068 if (subsearch) return subsearch;
5069 }
5070 }
5071
5072 return xml_node();
5073 }
5074 }
5075
5076 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
5077 {
5078 walker._depth = -1;
5079
5080 xml_node arg_begin = *this;
5081 if (!walker.begin(arg_begin)) return false;
5082
5083 xml_node cur = first_child();
5084
5085 if (cur)
5086 {
5087 ++walker._depth;
5088
5089 do
5090 {
5091 xml_node arg_for_each = cur;
5092 if (!walker.for_each(arg_for_each))
5093 return false;
5094
5095 if (cur.first_child())
5096 {
5097 ++walker._depth;
5098 cur = cur.first_child();
5099 }
5100 else if (cur.next_sibling())
5101 cur = cur.next_sibling();
5102 else
5103 {
5104 // Borland C++ workaround
5105 while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
5106 {
5107 --walker._depth;
5108 cur = cur.parent();
5109 }
5110
5111 if (cur != *this)
5112 cur = cur.next_sibling();
5113 }
5114 }
5115 while (cur && cur != *this);
5116 }
5117
5118 assert(walker._depth == -1);
5119
5120 xml_node arg_end = *this;
5121 return walker.end(arg_end);
5122 }
5123
5124 PUGI__FN size_t xml_node::hash_value() const
5125 {
5126 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
5127 }
5128
5129 PUGI__FN xml_node_struct* xml_node::internal_object() const
5130 {
5131 return _root;
5132 }
5133
5134 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
5135 {
5136 if (!_root) return;
5137
5138 impl::xml_buffered_writer buffered_writer(writer, encoding);
5139
5140 impl::node_output(buffered_writer, *this, indent, flags, depth);
5141 }
5142
5143#ifndef PUGIXML_NO_STL
5144 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
5145 {
5146 xml_writer_stream writer(stream);
5147
5148 print(writer, indent, flags, encoding, depth);
5149 }
5150
5151 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
5152 {
5153 xml_writer_stream writer(stream);
5154
5155 print(writer, indent, flags, encoding_wchar, depth);
5156 }
5157#endif
5158
5159 PUGI__FN ptrdiff_t xml_node::offset_debug() const
5160 {
5161 xml_node_struct* r = root()._root;
5162
5163 if (!r) return -1;
5164
5165 const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
5166
5167 if (!buffer) return -1;
5168
5169 switch (type())
5170 {
5171 case node_document:
5172 return 0;
5173
5174 case node_element:
5175 case node_declaration:
5176 case node_pi:
5177 return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
5178
5179 case node_pcdata:
5180 case node_cdata:
5181 case node_comment:
5182 case node_doctype:
5183 return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
5184
5185 default:
5186 return -1;
5187 }
5188 }
5189
5190#ifdef __BORLANDC__
5191 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
5192 {
5193 return (bool)lhs && rhs;
5194 }
5195
5196 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
5197 {
5198 return (bool)lhs || rhs;
5199 }
5200#endif
5201
5202 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
5203 {
5204 }
5205
5206 PUGI__FN xml_node_struct* xml_text::_data() const
5207 {
5208 if (!_root || impl::is_text_node(_root)) return _root;
5209
5210 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
5211 if (impl::is_text_node(node))
5212 return node;
5213
5214 return 0;
5215 }
5216
5217 PUGI__FN xml_node_struct* xml_text::_data_new()
5218 {
5219 xml_node_struct* d = _data();
5220 if (d) return d;
5221
5222 return xml_node(_root).append_child(node_pcdata).internal_object();
5223 }
5224
5225 PUGI__FN xml_text::xml_text(): _root(0)
5226 {
5227 }
5228
5229 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
5230 {
5231 }
5232
5233 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
5234 {
5235 return _data() ? unspecified_bool_xml_text : 0;
5236 }
5237
5238 PUGI__FN bool xml_text::operator!() const
5239 {
5240 return !_data();
5241 }
5242
5243 PUGI__FN bool xml_text::empty() const
5244 {
5245 return _data() == 0;
5246 }
5247
5248 PUGI__FN const char_t* xml_text::get() const
5249 {
5250 xml_node_struct* d = _data();
5251
5252 return (d && d->value) ? d->value : PUGIXML_TEXT("");
5253 }
5254
5255 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
5256 {
5257 xml_node_struct* d = _data();
5258
5259 return (d && d->value) ? d->value : def;
5260 }
5261
5262 PUGI__FN int xml_text::as_int(int def) const
5263 {
5264 xml_node_struct* d = _data();
5265
5266 return impl::get_value_int(d ? d->value : 0, def);
5267 }
5268
5269 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
5270 {
5271 xml_node_struct* d = _data();
5272
5273 return impl::get_value_uint(d ? d->value : 0, def);
5274 }
5275
5276 PUGI__FN double xml_text::as_double(double def) const
5277 {
5278 xml_node_struct* d = _data();
5279
5280 return impl::get_value_double(d ? d->value : 0, def);
5281 }
5282
5283 PUGI__FN float xml_text::as_float(float def) const
5284 {
5285 xml_node_struct* d = _data();
5286
5287 return impl::get_value_float(d ? d->value : 0, def);
5288 }
5289
5290 PUGI__FN bool xml_text::as_bool(bool def) const
5291 {
5292 xml_node_struct* d = _data();
5293
5294 return impl::get_value_bool(d ? d->value : 0, def);
5295 }
5296
5297#ifdef PUGIXML_HAS_LONG_LONG
5298 PUGI__FN long long xml_text::as_llong(long long def) const
5299 {
5300 xml_node_struct* d = _data();
5301
5302 return impl::get_value_llong(d ? d->value : 0, def);
5303 }
5304
5305 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
5306 {
5307 xml_node_struct* d = _data();
5308
5309 return impl::get_value_ullong(d ? d->value : 0, def);
5310 }
5311#endif
5312
5313 PUGI__FN bool xml_text::set(const char_t* rhs)
5314 {
5315 xml_node_struct* dn = _data_new();
5316
5317 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5318 }
5319
5320 PUGI__FN bool xml_text::set(int rhs)
5321 {
5322 xml_node_struct* dn = _data_new();
5323
5324 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5325 }
5326
5327 PUGI__FN bool xml_text::set(unsigned int rhs)
5328 {
5329 xml_node_struct* dn = _data_new();
5330
5331 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5332 }
5333
5334 PUGI__FN bool xml_text::set(double rhs)
5335 {
5336 xml_node_struct* dn = _data_new();
5337
5338 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5339 }
5340
5341 PUGI__FN bool xml_text::set(bool rhs)
5342 {
5343 xml_node_struct* dn = _data_new();
5344
5345 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5346 }
5347
5348#ifdef PUGIXML_HAS_LONG_LONG
5349 PUGI__FN bool xml_text::set(long long rhs)
5350 {
5351 xml_node_struct* dn = _data_new();
5352
5353 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5354 }
5355
5356 PUGI__FN bool xml_text::set(unsigned long long rhs)
5357 {
5358 xml_node_struct* dn = _data_new();
5359
5360 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
5361 }
5362#endif
5363
5364 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
5365 {
5366 set(rhs);
5367 return *this;
5368 }
5369
5370 PUGI__FN xml_text& xml_text::operator=(int rhs)
5371 {
5372 set(rhs);
5373 return *this;
5374 }
5375
5376 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
5377 {
5378 set(rhs);
5379 return *this;
5380 }
5381
5382 PUGI__FN xml_text& xml_text::operator=(double rhs)
5383 {
5384 set(rhs);
5385 return *this;
5386 }
5387
5388 PUGI__FN xml_text& xml_text::operator=(bool rhs)
5389 {
5390 set(rhs);
5391 return *this;
5392 }
5393
5394#ifdef PUGIXML_HAS_LONG_LONG
5395 PUGI__FN xml_text& xml_text::operator=(long long rhs)
5396 {
5397 set(rhs);
5398 return *this;
5399 }
5400
5401 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
5402 {
5403 set(rhs);
5404 return *this;
5405 }
5406#endif
5407
5408 PUGI__FN xml_node xml_text::data() const
5409 {
5410 return xml_node(_data());
5411 }
5412
5413#ifdef __BORLANDC__
5414 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
5415 {
5416 return (bool)lhs && rhs;
5417 }
5418
5419 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
5420 {
5421 return (bool)lhs || rhs;
5422 }
5423#endif
5424
5425 PUGI__FN xml_node_iterator::xml_node_iterator()
5426 {
5427 }
5428
5429 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
5430 {
5431 }
5432
5433 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
5434 {
5435 }
5436
5437 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
5438 {
5439 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
5440 }
5441
5442 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
5443 {
5444 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
5445 }
5446
5447 PUGI__FN xml_node& xml_node_iterator::operator*() const
5448 {
5449 assert(_wrap._root);
5450 return _wrap;
5451 }
5452
5453 PUGI__FN xml_node* xml_node_iterator::operator->() const
5454 {
5455 assert(_wrap._root);
5456 return const_cast<xml_node*>(&_wrap); // BCC32 workaround
5457 }
5458
5459 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
5460 {
5461 assert(_wrap._root);
5462 _wrap._root = _wrap._root->next_sibling;
5463 return *this;
5464 }
5465
5466 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
5467 {
5468 xml_node_iterator temp = *this;
5469 ++*this;
5470 return temp;
5471 }
5472
5473 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
5474 {
5475 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
5476 return *this;
5477 }
5478
5479 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
5480 {
5481 xml_node_iterator temp = *this;
5482 --*this;
5483 return temp;
5484 }
5485
5486 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
5487 {
5488 }
5489
5490 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
5491 {
5492 }
5493
5494 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
5495 {
5496 }
5497
5498 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
5499 {
5500 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
5501 }
5502
5503 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
5504 {
5505 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
5506 }
5507
5508 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
5509 {
5510 assert(_wrap._attr);
5511 return _wrap;
5512 }
5513
5514 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
5515 {
5516 assert(_wrap._attr);
5517 return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
5518 }
5519
5520 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
5521 {
5522 assert(_wrap._attr);
5523 _wrap._attr = _wrap._attr->next_attribute;
5524 return *this;
5525 }
5526
5527 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
5528 {
5529 xml_attribute_iterator temp = *this;
5530 ++*this;
5531 return temp;
5532 }
5533
5534 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
5535 {
5536 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
5537 return *this;
5538 }
5539
5540 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
5541 {
5542 xml_attribute_iterator temp = *this;
5543 --*this;
5544 return temp;
5545 }
5546
5547 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
5548 {
5549 }
5550
5551 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
5552 {
5553 }
5554
5555 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
5556 {
5557 }
5558
5559 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
5560 {
5561 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
5562 }
5563
5564 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
5565 {
5566 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
5567 }
5568
5569 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
5570 {
5571 assert(_wrap._root);
5572 return _wrap;
5573 }
5574
5575 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
5576 {
5577 assert(_wrap._root);
5578 return const_cast<xml_node*>(&_wrap); // BCC32 workaround
5579 }
5580
5581 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
5582 {
5583 assert(_wrap._root);
5584 _wrap = _wrap.next_sibling(_name);
5585 return *this;
5586 }
5587
5588 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
5589 {
5590 xml_named_node_iterator temp = *this;
5591 ++*this;
5592 return temp;
5593 }
5594
5595 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
5596 {
5597 if (_wrap._root)
5598 _wrap = _wrap.previous_sibling(_name);
5599 else
5600 {
5601 _wrap = _parent.last_child();
5602
5603 if (!impl::strequal(_wrap.name(), _name))
5604 _wrap = _wrap.previous_sibling(_name);
5605 }
5606
5607 return *this;
5608 }
5609
5610 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
5611 {
5612 xml_named_node_iterator temp = *this;
5613 --*this;
5614 return temp;
5615 }
5616
5617 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
5618 {
5619 }
5620
5621 PUGI__FN xml_parse_result::operator bool() const
5622 {
5623 return status == status_ok;
5624 }
5625
5626 PUGI__FN const char* xml_parse_result::description() const
5627 {
5628 switch (status)
5629 {
5630 case status_ok: return "No error";
5631
5632 case status_file_not_found: return "File was not found";
5633 case status_io_error: return "Error reading from file/stream";
5634 case status_out_of_memory: return "Could not allocate memory";
5635 case status_internal_error: return "Internal error occurred";
5636
5637 case status_unrecognized_tag: return "Could not determine tag type";
5638
5639 case status_bad_pi: return "Error parsing document declaration/processing instruction";
5640 case status_bad_comment: return "Error parsing comment";
5641 case status_bad_cdata: return "Error parsing CDATA section";
5642 case status_bad_doctype: return "Error parsing document type declaration";
5643 case status_bad_pcdata: return "Error parsing PCDATA section";
5644 case status_bad_start_element: return "Error parsing start element tag";
5645 case status_bad_attribute: return "Error parsing element attribute";
5646 case status_bad_end_element: return "Error parsing end element tag";
5647 case status_end_element_mismatch: return "Start-end tags mismatch";
5648
5649 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
5650
5651 case status_no_document_element: return "No document element found";
5652
5653 default: return "Unknown error";
5654 }
5655 }
5656
5657 PUGI__FN xml_document::xml_document(): _buffer(0)
5658 {
5659 create();
5660 }
5661
5662 PUGI__FN xml_document::~xml_document()
5663 {
5664 destroy();
5665 }
5666
5667 PUGI__FN void xml_document::reset()
5668 {
5669 destroy();
5670 create();
5671 }
5672
5673 PUGI__FN void xml_document::reset(const xml_document& proto)
5674 {
5675 reset();
5676
5677 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
5678 append_copy(cur);
5679 }
5680
5681 PUGI__FN void xml_document::create()
5682 {
5683 assert(!_root);
5684
5685 // initialize sentinel page
5686 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
5687
5688 // align upwards to page boundary
5689 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
5690
5691 // prepare page structure
5692 impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
5693 assert(page);
5694
5695 page->busy_size = impl::xml_memory_page_size;
5696
5697 // allocate new root
5698 _root = new (page->data) impl::xml_document_struct(page);
5699 _root->prev_sibling_c = _root;
5700
5701 // setup sentinel page
5702 page->allocator = static_cast<impl::xml_document_struct*>(_root);
5703 }
5704
5705 PUGI__FN void xml_document::destroy()
5706 {
5707 assert(_root);
5708
5709 // destroy static storage
5710 if (_buffer)
5711 {
5712 impl::xml_memory::deallocate(_buffer);
5713 _buffer = 0;
5714 }
5715
5716 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
5717 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
5718 {
5719 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
5720 }
5721
5722 // destroy dynamic storage, leave sentinel page (it's in static memory)
5723 impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
5724 assert(root_page && !root_page->prev && !root_page->memory);
5725
5726 for (impl::xml_memory_page* page = root_page->next; page; )
5727 {
5728 impl::xml_memory_page* next = page->next;
5729
5730 impl::xml_allocator::deallocate_page(page);
5731
5732 page = next;
5733 }
5734
5735 _root = 0;
5736 }
5737
5738#ifndef PUGIXML_NO_STL
5739 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
5740 {
5741 reset();
5742
5743 return impl::load_stream_impl(*this, stream, options, encoding);
5744 }
5745
5746 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
5747 {
5748 reset();
5749
5750 return impl::load_stream_impl(*this, stream, options, encoding_wchar);
5751 }
5752#endif
5753
5754 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
5755 {
5756 // Force native encoding (skip autodetection)
5757 #ifdef PUGIXML_WCHAR_MODE
5758 xml_encoding encoding = encoding_wchar;
5759 #else
5760 xml_encoding encoding = encoding_utf8;
5761 #endif
5762
5763 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
5764 }
5765
5766 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
5767 {
5768 reset();
5769
5770 FILE* file = fopen(path_, "rb");
5771
5772 return impl::load_file_impl(*this, file, options, encoding);
5773 }
5774
5775 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
5776 {
5777 reset();
5778
5779 FILE* file = impl::open_file_wide(path_, L"rb");
5780
5781 return impl::load_file_impl(*this, file, options, encoding);
5782 }
5783
5784 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
5785 {
5786 reset();
5787
5788 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
5789 }
5790
5791 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
5792 {
5793 reset();
5794
5795 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
5796 }
5797
5798 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
5799 {
5800 reset();
5801
5802 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
5803 }
5804
5805 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5806 {
5807 impl::xml_buffered_writer buffered_writer(writer, encoding);
5808
5809 if ((flags & format_write_bom) && encoding != encoding_latin1)
5810 {
5811 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
5812 #ifdef PUGIXML_WCHAR_MODE
5813 unsigned int bom = 0xfeff;
5814 buffered_writer.write(static_cast<wchar_t>(bom));
5815 #else
5816 buffered_writer.write('\xef', '\xbb', '\xbf');
5817 #endif
5818 }
5819
5820 if (!(flags & format_no_declaration) && !impl::has_declaration(*this))
5821 {
5822 buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
5823 if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
5824 buffered_writer.write('?', '>');
5825 if (!(flags & format_raw)) buffered_writer.write('\n');
5826 }
5827
5828 impl::node_output(buffered_writer, *this, indent, flags, 0);
5829 }
5830
5831#ifndef PUGIXML_NO_STL
5832 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5833 {
5834 xml_writer_stream writer(stream);
5835
5836 save(writer, indent, flags, encoding);
5837 }
5838
5839 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
5840 {
5841 xml_writer_stream writer(stream);
5842
5843 save(writer, indent, flags, encoding_wchar);
5844 }
5845#endif
5846
5847 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5848 {
5849 FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
5850 return impl::save_file_impl(*this, file, indent, flags, encoding);
5851 }
5852
5853 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5854 {
5855 FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
5856 return impl::save_file_impl(*this, file, indent, flags, encoding);
5857 }
5858
5859 PUGI__FN xml_node xml_document::document_element() const
5860 {
5861 assert(_root);
5862
5863 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5864 if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
5865 return xml_node(i);
5866
5867 return xml_node();
5868 }
5869
5870#ifndef PUGIXML_NO_STL
5871 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
5872 {
5873 assert(str);
5874
5875 return impl::as_utf8_impl(str, impl::strlength_wide(str));
5876 }
5877
5878 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
5879 {
5880 return impl::as_utf8_impl(str.c_str(), str.size());
5881 }
5882
5883 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
5884 {
5885 assert(str);
5886
5887 return impl::as_wide_impl(str, strlen(str));
5888 }
5889
5890 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
5891 {
5892 return impl::as_wide_impl(str.c_str(), str.size());
5893 }
5894#endif
5895
5896 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
5897 {
5898 impl::xml_memory::allocate = allocate;
5899 impl::xml_memory::deallocate = deallocate;
5900 }
5901
5902 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
5903 {
5904 return impl::xml_memory::allocate;
5905 }
5906
5907 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
5908 {
5909 return impl::xml_memory::deallocate;
5910 }
5911}
5912
5913#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
5914namespace std
5915{
5916 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
5917 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
5918 {
5919 return std::bidirectional_iterator_tag();
5920 }
5921
5922 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
5923 {
5924 return std::bidirectional_iterator_tag();
5925 }
5926
5927 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
5928 {
5929 return std::bidirectional_iterator_tag();
5930 }
5931}
5932#endif
5933
5934#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
5935namespace std
5936{
5937 // Workarounds for (non-standard) iterator category detection
5938 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
5939 {
5940 return std::bidirectional_iterator_tag();
5941 }
5942
5943 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
5944 {
5945 return std::bidirectional_iterator_tag();
5946 }
5947
5948 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
5949 {
5950 return std::bidirectional_iterator_tag();
5951 }
5952}
5953#endif
5954
5955#ifndef PUGIXML_NO_XPATH
5956
5957// STL replacements
5960 {
5961 template <typename T> bool operator()(const T& lhs, const T& rhs) const
5962 {
5963 return lhs == rhs;
5964 }
5965 };
5966
5968 {
5969 template <typename T> bool operator()(const T& lhs, const T& rhs) const
5970 {
5971 return lhs != rhs;
5972 }
5973 };
5974
5975 struct less
5976 {
5977 template <typename T> bool operator()(const T& lhs, const T& rhs) const
5978 {
5979 return lhs < rhs;
5980 }
5981 };
5982
5984 {
5985 template <typename T> bool operator()(const T& lhs, const T& rhs) const
5986 {
5987 return lhs <= rhs;
5988 }
5989 };
5990
5991 template <typename T> void swap(T& lhs, T& rhs)
5992 {
5993 T temp = lhs;
5994 lhs = rhs;
5995 rhs = temp;
5996 }
5997
5998 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
5999 {
6000 I result = begin;
6001
6002 for (I it = begin + 1; it != end; ++it)
6003 if (pred(*it, *result))
6004 result = it;
6005
6006 return result;
6007 }
6008
6009 template <typename I> void reverse(I begin, I end)
6010 {
6011 while (end - begin > 1) swap(*begin++, *--end);
6012 }
6013
6014 template <typename I> I unique(I begin, I end)
6015 {
6016 // fast skip head
6017 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
6018
6019 if (begin == end) return begin;
6020
6021 // last written element
6022 I write = begin++;
6023
6024 // merge unique elements
6025 while (begin != end)
6026 {
6027 if (*begin != *write)
6028 *++write = *begin++;
6029 else
6030 begin++;
6031 }
6032
6033 // past-the-end (write points to live element)
6034 return write + 1;
6035 }
6036
6037 template <typename I> void copy_backwards(I begin, I end, I target)
6038 {
6039 while (begin != end) *--target = *--end;
6040 }
6041
6042 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
6043 {
6044 assert(begin != end);
6045
6046 for (I it = begin + 1; it != end; ++it)
6047 {
6048 T val = *it;
6049
6050 if (pred(val, *begin))
6051 {
6052 // move to front
6053 copy_backwards(begin, it, it + 1);
6054 *begin = val;
6055 }
6056 else
6057 {
6058 I hole = it;
6059
6060 // move hole backwards
6061 while (pred(val, *(hole - 1)))
6062 {
6063 *hole = *(hole - 1);
6064 hole--;
6065 }
6066
6067 // fill hole with element
6068 *hole = val;
6069 }
6070 }
6071 }
6072
6073 // std variant for elements with ==
6074 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
6075 {
6076 I eqbeg = middle, eqend = middle + 1;
6077
6078 // expand equal range
6079 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
6080 while (eqend != end && *eqend == *eqbeg) ++eqend;
6081
6082 // process outer elements
6083 I ltend = eqbeg, gtbeg = eqend;
6084
6085 for (;;)
6086 {
6087 // find the element from the right side that belongs to the left one
6088 for (; gtbeg != end; ++gtbeg)
6089 if (!pred(*eqbeg, *gtbeg))
6090 {
6091 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
6092 else break;
6093 }
6094
6095 // find the element from the left side that belongs to the right one
6096 for (; ltend != begin; --ltend)
6097 if (!pred(*(ltend - 1), *eqbeg))
6098 {
6099 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
6100 else break;
6101 }
6102
6103 // scanned all elements
6104 if (gtbeg == end && ltend == begin)
6105 {
6106 *out_eqbeg = eqbeg;
6107 *out_eqend = eqend;
6108 return;
6109 }
6110
6111 // make room for elements by moving equal area
6112 if (gtbeg == end)
6113 {
6114 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
6115 swap(*eqbeg, *--eqend);
6116 }
6117 else if (ltend == begin)
6118 {
6119 if (eqend != gtbeg) swap(*eqbeg, *eqend);
6120 ++eqend;
6121 swap(*gtbeg++, *eqbeg++);
6122 }
6123 else swap(*gtbeg++, *--ltend);
6124 }
6125 }
6126
6127 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
6128 {
6129 if (pred(*middle, *first)) swap(*middle, *first);
6130 if (pred(*last, *middle)) swap(*last, *middle);
6131 if (pred(*middle, *first)) swap(*middle, *first);
6132 }
6133
6134 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
6135 {
6136 if (last - first <= 40)
6137 {
6138 // median of three for small chunks
6139 median3(first, middle, last, pred);
6140 }
6141 else
6142 {
6143 // median of nine
6144 size_t step = (last - first + 1) / 8;
6145
6146 median3(first, first + step, first + 2 * step, pred);
6147 median3(middle - step, middle, middle + step, pred);
6148 median3(last - 2 * step, last - step, last, pred);
6149 median3(first + step, middle, last - step, pred);
6150 }
6151 }
6152
6153 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
6154 {
6155 // sort large chunks
6156 while (end - begin > 32)
6157 {
6158 // find median element
6159 I middle = begin + (end - begin) / 2;
6160 median(begin, middle, end - 1, pred);
6161
6162 // partition in three chunks (< = >)
6163 I eqbeg, eqend;
6164 partition(begin, middle, end, pred, &eqbeg, &eqend);
6165
6166 // loop on larger half
6167 if (eqbeg - begin > end - eqend)
6168 {
6169 sort(eqend, end, pred);
6170 end = eqbeg;
6171 }
6172 else
6173 {
6174 sort(begin, eqbeg, pred);
6175 begin = eqend;
6176 }
6177 }
6178
6179 // insertion sort small chunk
6180 if (begin != end) insertion_sort(begin, end, pred, &*begin);
6181 }
6183
6184// Allocator used for AST and evaluation stacks
6187 {
6189 size_t capacity;
6190
6191 char data[
6192 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
6193 PUGIXML_MEMORY_XPATH_PAGE_SIZE
6194 #else
6195 4096
6196 #endif
6197 ];
6198 };
6199
6201 {
6204
6205 public:
6206 #ifdef PUGIXML_NO_EXCEPTIONS
6207 jmp_buf* error_handler;
6208 #endif
6209
6210 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
6211 {
6212 #ifdef PUGIXML_NO_EXCEPTIONS
6213 error_handler = 0;
6214 #endif
6215 }
6216
6217 void* allocate_nothrow(size_t size)
6218 {
6219 // align size so that we're able to store pointers in subsequent blocks
6220 size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
6221
6222 if (_root_size + size <= _root->capacity)
6223 {
6224 void* buf = _root->data + _root_size;
6225 _root_size += size;
6226 return buf;
6227 }
6228 else
6229 {
6230 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
6231 size_t block_capacity_base = sizeof(_root->data);
6232 size_t block_capacity_req = size + block_capacity_base / 4;
6233 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
6234
6235 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
6236
6237 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
6238 if (!block) return 0;
6239
6240 block->next = _root;
6241 block->capacity = block_capacity;
6242
6243 _root = block;
6244 _root_size = size;
6245
6246 return block->data;
6247 }
6248 }
6249
6250 void* allocate(size_t size)
6251 {
6252 void* result = allocate_nothrow(size);
6253
6254 if (!result)
6255 {
6256 #ifdef PUGIXML_NO_EXCEPTIONS
6257 assert(error_handler);
6258 longjmp(*error_handler, 1);
6259 #else
6260 throw std::bad_alloc();
6261 #endif
6262 }
6263
6264 return result;
6265 }
6266
6267 void* reallocate(void* ptr, size_t old_size, size_t new_size)
6268 {
6269 // align size so that we're able to store pointers in subsequent blocks
6270 old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
6271 new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
6272
6273 // we can only reallocate the last object
6274 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
6275
6276 // adjust root size so that we have not allocated the object at all
6277 bool only_object = (_root_size == old_size);
6278
6279 if (ptr) _root_size -= old_size;
6280
6281 // allocate a new version (this will obviously reuse the memory if possible)
6282 void* result = allocate(new_size);
6283 assert(result);
6284
6285 // we have a new block
6286 if (result != ptr && ptr)
6287 {
6288 // copy old data
6289 assert(new_size >= old_size);
6290 memcpy(result, ptr, old_size);
6291
6292 // free the previous page if it had no other objects
6293 if (only_object)
6294 {
6295 assert(_root->data == result);
6296 assert(_root->next);
6297
6299
6300 if (next)
6301 {
6302 // deallocate the whole page, unless it was the first one
6304 _root->next = next;
6305 }
6306 }
6307 }
6308
6309 return result;
6310 }
6311
6312 void revert(const xpath_allocator& state)
6313 {
6314 // free all new pages
6316
6317 while (cur != state._root)
6318 {
6319 xpath_memory_block* next = cur->next;
6320
6322
6323 cur = next;
6324 }
6325
6326 // restore state
6327 _root = state._root;
6328 _root_size = state._root_size;
6329 }
6330
6331 void release()
6332 {
6334 assert(cur);
6335
6336 while (cur->next)
6337 {
6338 xpath_memory_block* next = cur->next;
6339
6341
6342 cur = next;
6343 }
6344 }
6345 };
6346
6361
6367
6369 {
6374
6375 #ifdef PUGIXML_NO_EXCEPTIONS
6376 jmp_buf error_handler;
6377 #endif
6378
6380 {
6381 blocks[0].next = blocks[1].next = 0;
6382 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
6383
6384 stack.result = &result;
6385 stack.temp = &temp;
6386
6387 #ifdef PUGIXML_NO_EXCEPTIONS
6388 result.error_handler = temp.error_handler = &error_handler;
6389 #endif
6390 }
6391
6393 {
6394 result.release();
6395 temp.release();
6396 }
6397 };
6399
6400// String class
6403 {
6404 const char_t* _buffer;
6406
6407 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
6408 {
6409 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
6410 assert(result);
6411
6412 memcpy(result, string, length * sizeof(char_t));
6413 result[length] = 0;
6414
6415 return result;
6416 }
6417
6418 static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
6419 {
6420 return duplicate_string(string, strlength(string), alloc);
6421 }
6422
6423 public:
6424 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
6425 {
6426 }
6427
6428 explicit xpath_string(const char_t* str, xpath_allocator* alloc)
6429 {
6430 bool empty_ = (*str == 0);
6431
6432 _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
6433 _uses_heap = !empty_;
6434 }
6435
6436 explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
6437 {
6438 }
6439
6440 xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
6441 {
6442 assert(begin <= end);
6443
6444 bool empty_ = (begin == end);
6445
6446 _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
6447 _uses_heap = !empty_;
6448 }
6449
6450 void append(const xpath_string& o, xpath_allocator* alloc)
6451 {
6452 // skip empty sources
6453 if (!*o._buffer) return;
6454
6455 // fast append for constant empty target and constant source
6456 if (!*_buffer && !_uses_heap && !o._uses_heap)
6457 {
6458 _buffer = o._buffer;
6459 }
6460 else
6461 {
6462 // need to make heap copy
6463 size_t target_length = strlength(_buffer);
6464 size_t source_length = strlength(o._buffer);
6465 size_t result_length = target_length + source_length;
6466
6467 // allocate new buffer
6468 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
6469 assert(result);
6470
6471 // append first string to the new buffer in case there was no reallocation
6472 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
6473
6474 // append second string to the new buffer
6475 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
6476 result[result_length] = 0;
6477
6478 // finalize
6479 _buffer = result;
6480 _uses_heap = true;
6481 }
6482 }
6483
6484 const char_t* c_str() const
6485 {
6486 return _buffer;
6487 }
6488
6489 size_t length() const
6490 {
6491 return strlength(_buffer);
6492 }
6493
6494 char_t* data(xpath_allocator* alloc)
6495 {
6496 // make private heap copy
6497 if (!_uses_heap)
6498 {
6500 _uses_heap = true;
6501 }
6502
6503 return const_cast<char_t*>(_buffer);
6504 }
6505
6506 bool empty() const
6507 {
6508 return *_buffer == 0;
6509 }
6510
6511 bool operator==(const xpath_string& o) const
6512 {
6513 return strequal(_buffer, o._buffer);
6514 }
6515
6516 bool operator!=(const xpath_string& o) const
6517 {
6518 return !strequal(_buffer, o._buffer);
6519 }
6520
6521 bool uses_heap() const
6522 {
6523 return _uses_heap;
6524 }
6525 };
6526
6528 {
6529 return xpath_string(str, false);
6530 }
6532
6534 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
6535 {
6536 while (*pattern && *string == *pattern)
6537 {
6538 string++;
6539 pattern++;
6540 }
6541
6542 return *pattern == 0;
6543 }
6544
6545 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
6546 {
6547 #ifdef PUGIXML_WCHAR_MODE
6548 return wcschr(s, c);
6549 #else
6550 return strchr(s, c);
6551 #endif
6552 }
6553
6554 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
6555 {
6556 #ifdef PUGIXML_WCHAR_MODE
6557 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
6558 return (*p == 0) ? s : wcsstr(s, p);
6559 #else
6560 return strstr(s, p);
6561 #endif
6562 }
6563
6564 // Converts symbol to lower case, if it is an ASCII one
6565 PUGI__FN char_t tolower_ascii(char_t ch)
6566 {
6567 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
6568 }
6569
6571 {
6572 if (na.attribute())
6573 return xpath_string_const(na.attribute().value());
6574 else
6575 {
6576 const xml_node& n = na.node();
6577
6578 switch (n.type())
6579 {
6580 case node_pcdata:
6581 case node_cdata:
6582 case node_comment:
6583 case node_pi:
6584 return xpath_string_const(n.value());
6585
6586 case node_document:
6587 case node_element:
6588 {
6589 xpath_string result;
6590
6591 xml_node cur = n.first_child();
6592
6593 while (cur && cur != n)
6594 {
6595 if (cur.type() == node_pcdata || cur.type() == node_cdata)
6596 result.append(xpath_string_const(cur.value()), alloc);
6597
6598 if (cur.first_child())
6599 cur = cur.first_child();
6600 else if (cur.next_sibling())
6601 cur = cur.next_sibling();
6602 else
6603 {
6604 while (!cur.next_sibling() && cur != n)
6605 cur = cur.parent();
6606
6607 if (cur != n) cur = cur.next_sibling();
6608 }
6609 }
6610
6611 return result;
6612 }
6613
6614 default:
6615 return xpath_string();
6616 }
6617 }
6618 }
6619
6620 PUGI__FN unsigned int node_height(xml_node n)
6621 {
6622 unsigned int result = 0;
6623
6624 while (n)
6625 {
6626 ++result;
6627 n = n.parent();
6628 }
6629
6630 return result;
6631 }
6632
6633 PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
6634 {
6635 // normalize heights
6636 for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
6637 for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
6638
6639 // one node is the ancestor of the other
6640 if (ln == rn) return lh < rh;
6641
6642 // find common ancestor
6643 while (ln.parent() != rn.parent())
6644 {
6645 ln = ln.parent();
6646 rn = rn.parent();
6647 }
6648
6649 // there is no common ancestor (the shared parent is null), nodes are from different documents
6650 if (!ln.parent()) return ln < rn;
6651
6652 // determine sibling order
6653 for (; ln; ln = ln.next_sibling())
6654 if (ln == rn)
6655 return true;
6656
6657 return false;
6658 }
6659
6660 PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
6661 {
6662 while (node && node != parent) node = node.parent();
6663
6664 return parent && node == parent;
6665 }
6666
6667 PUGI__FN const void* document_order(const xpath_node& xnode)
6668 {
6669 xml_node_struct* node = xnode.node().internal_object();
6670
6671 if (node)
6672 {
6673 if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
6674 if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
6675 return 0;
6676 }
6677
6678 xml_attribute_struct* attr = xnode.attribute().internal_object();
6679
6680 if (attr)
6681 {
6682 if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
6683 if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
6684 return 0;
6685 }
6686
6687 return 0;
6688 }
6689
6691 {
6692 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
6693 {
6694 // optimized document order based check
6695 const void* lo = document_order(lhs);
6696 const void* ro = document_order(rhs);
6697
6698 if (lo && ro) return lo < ro;
6699
6700 // slow comparison
6701 xml_node ln = lhs.node(), rn = rhs.node();
6702
6703 // compare attributes
6704 if (lhs.attribute() && rhs.attribute())
6705 {
6706 // shared parent
6707 if (lhs.parent() == rhs.parent())
6708 {
6709 // determine sibling order
6710 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
6711 if (a == rhs.attribute())
6712 return true;
6713
6714 return false;
6715 }
6716
6717 // compare attribute parents
6718 ln = lhs.parent();
6719 rn = rhs.parent();
6720 }
6721 else if (lhs.attribute())
6722 {
6723 // attributes go after the parent element
6724 if (lhs.parent() == rhs.node()) return false;
6725
6726 ln = lhs.parent();
6727 }
6728 else if (rhs.attribute())
6729 {
6730 // attributes go after the parent element
6731 if (rhs.parent() == lhs.node()) return true;
6732
6733 rn = rhs.parent();
6734 }
6735
6736 if (ln == rn) return false;
6737
6738 unsigned int lh = node_height(ln);
6739 unsigned int rh = node_height(rn);
6740
6741 return node_is_before(ln, lh, rn, rh);
6742 }
6743 };
6744
6746 {
6747 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
6748 {
6749 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
6750 else return rhs.attribute() ? false : lhs.node() < rhs.node();
6751 }
6752 };
6753
6755 {
6756 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
6757 union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
6758 u[0].i = 0x7fc00000;
6759 return u[0].f;
6760 #else
6761 // fallback
6762 const volatile double zero = 0.0;
6763 return zero / zero;
6764 #endif
6765 }
6766
6767 PUGI__FN bool is_nan(double value)
6768 {
6769 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
6770 return !!_isnan(value);
6771 #elif defined(fpclassify) && defined(FP_NAN)
6772 return fpclassify(value) == FP_NAN;
6773 #else
6774 // fallback
6775 const volatile double v = value;
6776 return v != v;
6777 #endif
6778 }
6779
6780 PUGI__FN const char_t* convert_number_to_string_special(double value)
6781 {
6782 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
6783 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
6784 if (_isnan(value)) return PUGIXML_TEXT("NaN");
6785 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
6786 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
6787 switch (fpclassify(value))
6788 {
6789 case FP_NAN:
6790 return PUGIXML_TEXT("NaN");
6791
6792 case FP_INFINITE:
6793 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
6794
6795 case FP_ZERO:
6796 return PUGIXML_TEXT("0");
6797
6798 default:
6799 return 0;
6800 }
6801 #else
6802 // fallback
6803 const volatile double v = value;
6804
6805 if (v == 0) return PUGIXML_TEXT("0");
6806 if (v != v) return PUGIXML_TEXT("NaN");
6807 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
6808 return 0;
6809 #endif
6810 }
6811
6813 {
6814 return (value != 0 && !is_nan(value));
6815 }
6816
6817 PUGI__FN void truncate_zeros(char* begin, char* end)
6818 {
6819 while (begin != end && end[-1] == '0') end--;
6820
6821 *end = 0;
6822 }
6823
6824 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
6825#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
6826 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
6827 {
6828 // get base values
6829 int sign, exponent;
6830 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
6831
6832 // truncate redundant zeros
6833 truncate_zeros(buffer, buffer + strlen(buffer));
6834
6835 // fill results
6836 *out_mantissa = buffer;
6837 *out_exponent = exponent;
6838 }
6839#else
6840 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
6841 {
6842 // get a scientific notation value with IEEE DBL_DIG decimals
6843 sprintf(buffer, "%.*e", DBL_DIG, value);
6844 assert(strlen(buffer) < buffer_size);
6845 (void)!buffer_size;
6846
6847 // get the exponent (possibly negative)
6848 char* exponent_string = strchr(buffer, 'e');
6849 assert(exponent_string);
6850
6851 int exponent = atoi(exponent_string + 1);
6852
6853 // extract mantissa string: skip sign
6854 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
6855 assert(mantissa[0] != '0' && mantissa[1] == '.');
6856
6857 // divide mantissa by 10 to eliminate integer part
6858 mantissa[1] = mantissa[0];
6859 mantissa++;
6860 exponent++;
6861
6862 // remove extra mantissa digits and zero-terminate mantissa
6863 truncate_zeros(mantissa, exponent_string);
6864
6865 // fill results
6866 *out_mantissa = mantissa;
6867 *out_exponent = exponent;
6868 }
6869#endif
6870
6872 {
6873 // try special number conversion
6874 const char_t* special = convert_number_to_string_special(value);
6875 if (special) return xpath_string_const(special);
6876
6877 // get mantissa + exponent form
6878 char mantissa_buffer[32];
6879
6880 char* mantissa;
6881 int exponent;
6882 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
6883
6884 // allocate a buffer of suitable length for the number
6885 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
6886 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
6887 assert(result);
6888
6889 // make the number!
6890 char_t* s = result;
6891
6892 // sign
6893 if (value < 0) *s++ = '-';
6894
6895 // integer part
6896 if (exponent <= 0)
6897 {
6898 *s++ = '0';
6899 }
6900 else
6901 {
6902 while (exponent > 0)
6903 {
6904 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
6905 *s++ = *mantissa ? *mantissa++ : '0';
6906 exponent--;
6907 }
6908 }
6909
6910 // fractional part
6911 if (*mantissa)
6912 {
6913 // decimal point
6914 *s++ = '.';
6915
6916 // extra zeroes from negative exponent
6917 while (exponent < 0)
6918 {
6919 *s++ = '0';
6920 exponent++;
6921 }
6922
6923 // extra mantissa digits
6924 while (*mantissa)
6925 {
6926 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
6927 *s++ = *mantissa++;
6928 }
6929 }
6930
6931 // zero-terminate
6932 assert(s < result + result_size);
6933 *s = 0;
6934
6935 return xpath_string(result, true);
6936 }
6937
6938 PUGI__FN bool check_string_to_number_format(const char_t* string)
6939 {
6940 // parse leading whitespace
6941 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6942
6943 // parse sign
6944 if (*string == '-') ++string;
6945
6946 if (!*string) return false;
6947
6948 // if there is no integer part, there should be a decimal part with at least one digit
6949 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
6950
6951 // parse integer part
6952 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6953
6954 // parse decimal part
6955 if (*string == '.')
6956 {
6957 ++string;
6958
6959 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6960 }
6961
6962 // parse trailing whitespace
6963 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6964
6965 return *string == 0;
6966 }
6967
6968 PUGI__FN double convert_string_to_number(const char_t* string)
6969 {
6970 // check string format
6971 if (!check_string_to_number_format(string)) return gen_nan();
6972
6973 // parse string
6974 #ifdef PUGIXML_WCHAR_MODE
6975 return wcstod(string, 0);
6976 #else
6977 return atof(string);
6978 #endif
6979 }
6980
6981 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
6982 {
6983 size_t length = static_cast<size_t>(end - begin);
6984 char_t* scratch = buffer;
6985
6986 if (length >= sizeof(buffer) / sizeof(buffer[0]))
6987 {
6988 // need to make dummy on-heap copy
6989 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
6990 if (!scratch) return false;
6991 }
6992
6993 // copy string to zero-terminated buffer and perform conversion
6994 memcpy(scratch, begin, length * sizeof(char_t));
6995 scratch[length] = 0;
6996
6997 *out_result = convert_string_to_number(scratch);
6998
6999 // free dummy buffer
7000 if (scratch != buffer) xml_memory::deallocate(scratch);
7001
7002 return true;
7003 }
7004
7005 PUGI__FN double round_nearest(double value)
7006 {
7007 return floor(value + 0.5);
7008 }
7009
7010 PUGI__FN double round_nearest_nzero(double value)
7011 {
7012 // same as round_nearest, but returns -0 for [-0.5, -0]
7013 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
7014 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
7015 }
7016
7017 PUGI__FN const char_t* qualified_name(const xpath_node& node)
7018 {
7019 return node.attribute() ? node.attribute().name() : node.node().name();
7020 }
7021
7022 PUGI__FN const char_t* local_name(const xpath_node& node)
7023 {
7024 const char_t* name = qualified_name(node);
7025 const char_t* p = find_char(name, ':');
7026
7027 return p ? p + 1 : name;
7028 }
7029
7031 {
7032 const char_t* prefix;
7034
7035 namespace_uri_predicate(const char_t* name)
7036 {
7037 const char_t* pos = find_char(name, ':');
7038
7039 prefix = pos ? name : 0;
7040 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
7041 }
7042
7043 bool operator()(const xml_attribute& a) const
7044 {
7045 const char_t* name = a.name();
7046
7047 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
7048
7049 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
7050 }
7051 };
7052
7053 PUGI__FN const char_t* namespace_uri(const xml_node& node)
7054 {
7055 namespace_uri_predicate pred = node.name();
7056
7057 xml_node p = node;
7058
7059 while (p)
7060 {
7061 xml_attribute a = p.find_attribute(pred);
7062
7063 if (a) return a.value();
7064
7065 p = p.parent();
7066 }
7067
7068 return PUGIXML_TEXT("");
7069 }
7070
7071 PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
7072 {
7073 namespace_uri_predicate pred = attr.name();
7074
7075 // Default namespace does not apply to attributes
7076 if (!pred.prefix) return PUGIXML_TEXT("");
7077
7078 xml_node p = parent;
7079
7080 while (p)
7081 {
7082 xml_attribute a = p.find_attribute(pred);
7083
7084 if (a) return a.value();
7085
7086 p = p.parent();
7087 }
7088
7089 return PUGIXML_TEXT("");
7090 }
7091
7092 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
7093 {
7094 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
7095 }
7096
7097 PUGI__FN void normalize_space(char_t* buffer)
7098 {
7099 char_t* write = buffer;
7100
7101 for (char_t* it = buffer; *it; )
7102 {
7103 char_t ch = *it++;
7104
7105 if (PUGI__IS_CHARTYPE(ch, ct_space))
7106 {
7107 // replace whitespace sequence with single space
7108 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
7109
7110 // avoid leading spaces
7111 if (write != buffer) *write++ = ' ';
7112 }
7113 else *write++ = ch;
7114 }
7115
7116 // remove trailing space
7117 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
7118
7119 // zero-terminate
7120 *write = 0;
7121 }
7122
7123 PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
7124 {
7125 size_t to_length = strlength(to);
7126
7127 char_t* write = buffer;
7128
7129 while (*buffer)
7130 {
7131 PUGI__DMC_VOLATILE char_t ch = *buffer++;
7132
7133 const char_t* pos = find_char(from, ch);
7134
7135 if (!pos)
7136 *write++ = ch; // do not process
7137 else if (static_cast<size_t>(pos - from) < to_length)
7138 *write++ = to[pos - from]; // replace
7139 }
7140
7141 // zero-terminate
7142 *write = 0;
7143 }
7144
7145 struct xpath_variable_boolean: xpath_variable
7146 {
7148 {
7149 }
7150
7151 bool value;
7152 char_t name[1];
7153 };
7154
7155 struct xpath_variable_number: xpath_variable
7156 {
7158 {
7159 }
7160
7161 double value;
7162 char_t name[1];
7163 };
7164
7165 struct xpath_variable_string: xpath_variable
7166 {
7168 {
7169 }
7170
7175
7176 char_t* value;
7177 char_t name[1];
7178 };
7179
7180 struct xpath_variable_node_set: xpath_variable
7181 {
7182 xpath_node_set value;
7183 char_t name[1];
7184 };
7185
7186 static const xpath_node_set dummy_node_set;
7187
7188 PUGI__FN unsigned int hash_string(const char_t* str)
7189 {
7190 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
7191 unsigned int result = 0;
7192
7193 while (*str)
7194 {
7195 result += static_cast<unsigned int>(*str++);
7196 result += result << 10;
7197 result ^= result >> 6;
7198 }
7199
7200 result += result << 3;
7201 result ^= result >> 11;
7202 result += result << 15;
7203
7204 return result;
7205 }
7206
7207 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
7208 {
7209 size_t length = strlength(name);
7210 if (length == 0) return 0; // empty variable names are invalid
7211
7212 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
7213 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
7214 if (!memory) return 0;
7215
7216 T* result = new (memory) T();
7217
7218 memcpy(result->name, name, (length + 1) * sizeof(char_t));
7219
7220 return result;
7221 }
7222
7223 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
7224 {
7225 switch (type)
7226 {
7227 case xpath_type_node_set:
7228 return new_xpath_variable<xpath_variable_node_set>(name);
7229
7230 case xpath_type_number:
7231 return new_xpath_variable<xpath_variable_number>(name);
7232
7233 case xpath_type_string:
7234 return new_xpath_variable<xpath_variable_string>(name);
7235
7236 case xpath_type_boolean:
7237 return new_xpath_variable<xpath_variable_boolean>(name);
7238
7239 default:
7240 return 0;
7241 }
7242 }
7243
7244 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
7245 {
7246 var->~T();
7248 }
7249
7250 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
7251 {
7252 switch (type)
7253 {
7254 case xpath_type_node_set:
7256 break;
7257
7258 case xpath_type_number:
7259 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
7260 break;
7261
7262 case xpath_type_string:
7263 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
7264 break;
7265
7266 case xpath_type_boolean:
7268 break;
7269
7270 default:
7271 assert(!"Invalid variable type");
7272 }
7273 }
7274
7275 PUGI__FN xpath_variable* get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end)
7276 {
7277 size_t length = static_cast<size_t>(end - begin);
7278 char_t* scratch = buffer;
7279
7280 if (length >= sizeof(buffer) / sizeof(buffer[0]))
7281 {
7282 // need to make dummy on-heap copy
7283 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
7284 if (!scratch) return 0;
7285 }
7286
7287 // copy string to zero-terminated buffer and perform lookup
7288 memcpy(scratch, begin, length * sizeof(char_t));
7289 scratch[length] = 0;
7290
7291 xpath_variable* result = set->get(scratch);
7292
7293 // free dummy buffer
7294 if (scratch != buffer) xml_memory::deallocate(scratch);
7295
7296 return result;
7297 }
7299
7300// Internal node set class
7302 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
7303 {
7304 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
7305
7306 if (type == xpath_node_set::type_unsorted)
7307 {
7308 sort(begin, end, document_order_comparator());
7309
7310 type = xpath_node_set::type_sorted;
7311 }
7312
7313 if (type != order) reverse(begin, end);
7314
7315 return order;
7316 }
7317
7318 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
7319 {
7320 if (begin == end) return xpath_node();
7321
7322 switch (type)
7323 {
7324 case xpath_node_set::type_sorted:
7325 return *begin;
7326
7327 case xpath_node_set::type_sorted_reverse:
7328 return *(end - 1);
7329
7330 case xpath_node_set::type_unsorted:
7331 return *min_element(begin, end, document_order_comparator());
7332
7333 default:
7334 assert(!"Invalid node set type");
7335 return xpath_node();
7336 }
7337 }
7338
7340 {
7341 xpath_node_set::type_t _type;
7342
7343 xpath_node* _begin;
7344 xpath_node* _end;
7345 xpath_node* _eos;
7346
7347 public:
7348 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
7349 {
7350 }
7351
7352 xpath_node* begin() const
7353 {
7354 return _begin;
7355 }
7356
7357 xpath_node* end() const
7358 {
7359 return _end;
7360 }
7361
7362 bool empty() const
7363 {
7364 return _begin == _end;
7365 }
7366
7367 size_t size() const
7368 {
7369 return static_cast<size_t>(_end - _begin);
7370 }
7371
7372 xpath_node first() const
7373 {
7374 return xpath_first(_begin, _end, _type);
7375 }
7376
7377 void push_back(const xpath_node& node, xpath_allocator* alloc)
7378 {
7379 if (_end == _eos)
7380 {
7381 size_t capacity = static_cast<size_t>(_eos - _begin);
7382
7383 // get new capacity (1.5x rule)
7384 size_t new_capacity = capacity + capacity / 2 + 1;
7385
7386 // reallocate the old array or allocate a new one
7387 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
7388 assert(data);
7389
7390 // finalize
7391 _begin = data;
7392 _end = data + capacity;
7393 _eos = data + new_capacity;
7394 }
7395
7396 *_end++ = node;
7397 }
7398
7399 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
7400 {
7401 size_t size_ = static_cast<size_t>(_end - _begin);
7402 size_t capacity = static_cast<size_t>(_eos - _begin);
7403 size_t count = static_cast<size_t>(end_ - begin_);
7404
7405 if (size_ + count > capacity)
7406 {
7407 // reallocate the old array or allocate a new one
7408 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
7409 assert(data);
7410
7411 // finalize
7412 _begin = data;
7413 _end = data + size_;
7414 _eos = data + size_ + count;
7415 }
7416
7417 memcpy(_end, begin_, count * sizeof(xpath_node));
7418 _end += count;
7419 }
7420
7421 void sort_do()
7422 {
7423 _type = xpath_sort(_begin, _end, _type, false);
7424 }
7425
7426 void truncate(xpath_node* pos)
7427 {
7428 assert(_begin <= pos && pos <= _end);
7429
7430 _end = pos;
7431 }
7432
7434 {
7435 if (_type == xpath_node_set::type_unsorted)
7437
7438 _end = unique(_begin, _end);
7439 }
7440
7441 xpath_node_set::type_t type() const
7442 {
7443 return _type;
7444 }
7445
7446 void set_type(xpath_node_set::type_t value)
7447 {
7448 _type = value;
7449 }
7450 };
7452
7455 {
7456 xpath_node n;
7458
7459 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
7460 {
7461 }
7462 };
7463
7494
7496 {
7497 const char_t* begin;
7498 const char_t* end;
7499
7501 {
7502 }
7503
7504 bool operator==(const char_t* other) const
7505 {
7506 size_t length = static_cast<size_t>(end - begin);
7507
7508 return strequalrange(other, begin, length);
7509 }
7510 };
7511
7513 {
7514 const char_t* _cur;
7515 const char_t* _cur_lexeme_pos;
7517
7519
7520 public:
7521 explicit xpath_lexer(const char_t* query): _cur(query)
7522 {
7523 next();
7524 }
7525
7526 const char_t* state() const
7527 {
7528 return _cur;
7529 }
7530
7531 void next()
7532 {
7533 const char_t* cur = _cur;
7534
7535 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
7536
7537 // save lexeme position for error reporting
7538 _cur_lexeme_pos = cur;
7539
7540 switch (*cur)
7541 {
7542 case 0:
7544 break;
7545
7546 case '>':
7547 if (*(cur+1) == '=')
7548 {
7549 cur += 2;
7551 }
7552 else
7553 {
7554 cur += 1;
7556 }
7557 break;
7558
7559 case '<':
7560 if (*(cur+1) == '=')
7561 {
7562 cur += 2;
7564 }
7565 else
7566 {
7567 cur += 1;
7569 }
7570 break;
7571
7572 case '!':
7573 if (*(cur+1) == '=')
7574 {
7575 cur += 2;
7577 }
7578 else
7579 {
7581 }
7582 break;
7583
7584 case '=':
7585 cur += 1;
7587
7588 break;
7589
7590 case '+':
7591 cur += 1;
7593
7594 break;
7595
7596 case '-':
7597 cur += 1;
7599
7600 break;
7601
7602 case '*':
7603 cur += 1;
7605
7606 break;
7607
7608 case '|':
7609 cur += 1;
7611
7612 break;
7613
7614 case '$':
7615 cur += 1;
7616
7618 {
7620
7621 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7622
7623 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
7624 {
7625 cur++; // :
7626
7627 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7628 }
7629
7631
7633 }
7634 else
7635 {
7637 }
7638
7639 break;
7640
7641 case '(':
7642 cur += 1;
7644
7645 break;
7646
7647 case ')':
7648 cur += 1;
7650
7651 break;
7652
7653 case '[':
7654 cur += 1;
7656
7657 break;
7658
7659 case ']':
7660 cur += 1;
7662
7663 break;
7664
7665 case ',':
7666 cur += 1;
7668
7669 break;
7670
7671 case '/':
7672 if (*(cur+1) == '/')
7673 {
7674 cur += 2;
7676 }
7677 else
7678 {
7679 cur += 1;
7681 }
7682 break;
7683
7684 case '.':
7685 if (*(cur+1) == '.')
7686 {
7687 cur += 2;
7689 }
7690 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
7691 {
7692 _cur_lexeme_contents.begin = cur; // .
7693
7694 ++cur;
7695
7696 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
7697
7699
7701 }
7702 else
7703 {
7704 cur += 1;
7706 }
7707 break;
7708
7709 case '@':
7710 cur += 1;
7712
7713 break;
7714
7715 case '"':
7716 case '\'':
7717 {
7718 char_t terminator = *cur;
7719
7720 ++cur;
7721
7723 while (*cur && *cur != terminator) cur++;
7725
7726 if (!*cur)
7728 else
7729 {
7730 cur += 1;
7732 }
7733
7734 break;
7735 }
7736
7737 case ':':
7738 if (*(cur+1) == ':')
7739 {
7740 cur += 2;
7742 }
7743 else
7744 {
7746 }
7747 break;
7748
7749 default:
7750 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
7751 {
7753
7754 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
7755
7756 if (*cur == '.')
7757 {
7758 cur++;
7759
7760 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
7761 }
7762
7764
7766 }
7767 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
7768 {
7770
7771 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7772
7773 if (cur[0] == ':')
7774 {
7775 if (cur[1] == '*') // namespace test ncname:*
7776 {
7777 cur += 2; // :*
7778 }
7779 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
7780 {
7781 cur++; // :
7782
7783 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7784 }
7785 }
7786
7788
7790 }
7791 else
7792 {
7794 }
7795 }
7796
7797 _cur = cur;
7798 }
7799
7801 {
7802 return _cur_lexeme;
7803 }
7804
7805 const char_t* current_pos() const
7806 {
7807 return _cur_lexeme_pos;
7808 }
7809
7816 };
7817
7819 {
7821 ast_op_or, // left or right
7822 ast_op_and, // left and right
7823 ast_op_equal, // left = right
7824 ast_op_not_equal, // left != right
7825 ast_op_less, // left < right
7826 ast_op_greater, // left > right
7827 ast_op_less_or_equal, // left <= right
7828 ast_op_greater_or_equal, // left >= right
7829 ast_op_add, // left + right
7830 ast_op_subtract, // left - right
7831 ast_op_multiply, // left * right
7832 ast_op_divide, // left / right
7833 ast_op_mod, // left % right
7834 ast_op_negate, // left - right
7835 ast_op_union, // left | right
7836 ast_predicate, // apply predicate to set; next points to next predicate
7837 ast_filter, // select * from left where right
7838 ast_filter_posinv, // select * from left where right; proximity position invariant
7839 ast_string_constant, // string constant
7840 ast_number_constant, // number constant
7841 ast_variable, // variable
7842 ast_func_last, // last()
7843 ast_func_position, // position()
7844 ast_func_count, // count(left)
7845 ast_func_id, // id(left)
7846 ast_func_local_name_0, // local-name()
7847 ast_func_local_name_1, // local-name(left)
7848 ast_func_namespace_uri_0, // namespace-uri()
7849 ast_func_namespace_uri_1, // namespace-uri(left)
7851 ast_func_name_1, // name(left)
7853 ast_func_string_1, // string(left)
7854 ast_func_concat, // concat(left, right, siblings)
7855 ast_func_starts_with, // starts_with(left, right)
7856 ast_func_contains, // contains(left, right)
7857 ast_func_substring_before, // substring-before(left, right)
7858 ast_func_substring_after, // substring-after(left, right)
7859 ast_func_substring_2, // substring(left, right)
7860 ast_func_substring_3, // substring(left, right, third)
7861 ast_func_string_length_0, // string-length()
7862 ast_func_string_length_1, // string-length(left)
7863 ast_func_normalize_space_0, // normalize-space()
7864 ast_func_normalize_space_1, // normalize-space(left)
7865 ast_func_translate, // translate(left, right, third)
7866 ast_func_boolean, // boolean(left)
7867 ast_func_not, // not(left)
7868 ast_func_true, // true()
7869 ast_func_false, // false()
7870 ast_func_lang, // lang(left)
7872 ast_func_number_1, // number(left)
7873 ast_func_sum, // sum(left)
7874 ast_func_floor, // floor(left)
7875 ast_func_ceiling, // ceiling(left)
7876 ast_func_round, // round(left)
7877 ast_step, // process set left with step
7878 ast_step_root // select root node
7880
7897
7910
7911 template <axis_t N> struct axis_to_type
7912 {
7913 static const axis_t axis;
7914 };
7915
7916 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
7917
7919 {
7920 private:
7921 // node type
7922 char _type;
7924
7925 // for ast_step / ast_predicate
7926 char _axis;
7927 char _test;
7928
7929 // tree node structure
7933
7934 union
7935 {
7936 // value for ast_string_constant
7937 const char_t* string;
7938 // value for ast_number_constant
7939 double number;
7940 // variable for ast_variable
7941 xpath_variable* variable;
7942 // node test for ast_step (node name/namespace/node type/pi target)
7943 const char_t* nodetest;
7945
7948
7949 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
7950 {
7951 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
7952
7953 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
7954 {
7955 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
7956 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7957 else if (lt == xpath_type_number || rt == xpath_type_number)
7958 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7959 else if (lt == xpath_type_string || rt == xpath_type_string)
7960 {
7962
7963 xpath_string ls = lhs->eval_string(c, stack);
7964 xpath_string rs = rhs->eval_string(c, stack);
7965
7966 return comp(ls, rs);
7967 }
7968 }
7969 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
7970 {
7972
7973 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
7974 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
7975
7976 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
7977 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
7978 {
7980
7981 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
7982 return true;
7983 }
7984
7985 return false;
7986 }
7987 else
7988 {
7989 if (lt == xpath_type_node_set)
7990 {
7991 swap(lhs, rhs);
7992 swap(lt, rt);
7993 }
7994
7995 if (lt == xpath_type_boolean)
7996 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7997 else if (lt == xpath_type_number)
7998 {
8000
8001 double l = lhs->eval_number(c, stack);
8002 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
8003
8004 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
8005 {
8007
8008 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
8009 return true;
8010 }
8011
8012 return false;
8013 }
8014 else if (lt == xpath_type_string)
8015 {
8017
8018 xpath_string l = lhs->eval_string(c, stack);
8019 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
8020
8021 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
8022 {
8024
8025 if (comp(l, string_value(*ri, stack.result)))
8026 return true;
8027 }
8028
8029 return false;
8030 }
8031 }
8032
8033 assert(!"Wrong types");
8034 return false;
8035 }
8036
8037 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
8038 {
8039 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
8040
8041 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
8042 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
8043 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
8044 {
8046
8047 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
8048 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
8049
8050 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
8051 {
8053
8054 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
8055
8056 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
8057 {
8058 xpath_allocator_capture crii(stack.result);
8059
8060 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
8061 return true;
8062 }
8063 }
8064
8065 return false;
8066 }
8067 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
8068 {
8070
8071 double l = lhs->eval_number(c, stack);
8072 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
8073
8074 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
8075 {
8077
8078 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
8079 return true;
8080 }
8081
8082 return false;
8083 }
8084 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
8085 {
8087
8088 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
8089 double r = rhs->eval_number(c, stack);
8090
8091 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
8092 {
8094
8095 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
8096 return true;
8097 }
8098
8099 return false;
8100 }
8101 else
8102 {
8103 assert(!"Wrong types");
8104 return false;
8105 }
8106 }
8107
8108 void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
8109 {
8110 assert(ns.size() >= first);
8111
8112 size_t i = 1;
8113 size_t size = ns.size() - first;
8114
8115 xpath_node* last = ns.begin() + first;
8116
8117 // remove_if... or well, sort of
8118 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
8119 {
8120 xpath_context c(*it, i, size);
8121
8122 if (expr->rettype() == xpath_type_number)
8123 {
8124 if (expr->eval_number(c, stack) == i)
8125 *last++ = *it;
8126 }
8127 else if (expr->eval_boolean(c, stack))
8128 *last++ = *it;
8129 }
8130
8131 ns.truncate(last);
8132 }
8133
8134 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
8135 {
8136 if (ns.size() == first) return;
8137
8138 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
8139 {
8140 apply_predicate(ns, first, pred->_left, stack);
8141 }
8142 }
8143
8144 void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
8145 {
8146 if (!a) return;
8147
8148 const char_t* name = a.name();
8149
8150 // There are no attribute nodes corresponding to attributes that declare namespaces
8151 // That is, "xmlns:..." or "xmlns"
8152 if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
8153
8154 switch (_test)
8155 {
8156 case nodetest_name:
8157 if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
8158 break;
8159
8160 case nodetest_type_node:
8161 case nodetest_all:
8162 ns.push_back(xpath_node(a, parent), alloc);
8163 break;
8164
8166 if (starts_with(name, _data.nodetest))
8167 ns.push_back(xpath_node(a, parent), alloc);
8168 break;
8169
8170 default:
8171 ;
8172 }
8173 }
8174
8175 void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
8176 {
8177 if (!n) return;
8178
8179 switch (_test)
8180 {
8181 case nodetest_name:
8182 if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
8183 break;
8184
8185 case nodetest_type_node:
8186 ns.push_back(n, alloc);
8187 break;
8188
8190 if (n.type() == node_comment)
8191 ns.push_back(n, alloc);
8192 break;
8193
8194 case nodetest_type_text:
8195 if (n.type() == node_pcdata || n.type() == node_cdata)
8196 ns.push_back(n, alloc);
8197 break;
8198
8199 case nodetest_type_pi:
8200 if (n.type() == node_pi)
8201 ns.push_back(n, alloc);
8202 break;
8203
8204 case nodetest_pi:
8205 if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
8206 ns.push_back(n, alloc);
8207 break;
8208
8209 case nodetest_all:
8210 if (n.type() == node_element)
8211 ns.push_back(n, alloc);
8212 break;
8213
8215 if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
8216 ns.push_back(n, alloc);
8217 break;
8218
8219 default:
8220 assert(!"Unknown axis");
8221 }
8222 }
8223
8224 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
8225 {
8226 const axis_t axis = T::axis;
8227
8228 switch (axis)
8229 {
8230 case axis_attribute:
8231 {
8232 for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
8233 step_push(ns, a, n, alloc);
8234
8235 break;
8236 }
8237
8238 case axis_child:
8239 {
8240 for (xml_node c = n.first_child(); c; c = c.next_sibling())
8241 step_push(ns, c, alloc);
8242
8243 break;
8244 }
8245
8246 case axis_descendant:
8248 {
8249 if (axis == axis_descendant_or_self)
8250 step_push(ns, n, alloc);
8251
8252 xml_node cur = n.first_child();
8253
8254 while (cur && cur != n)
8255 {
8256 step_push(ns, cur, alloc);
8257
8258 if (cur.first_child())
8259 cur = cur.first_child();
8260 else if (cur.next_sibling())
8261 cur = cur.next_sibling();
8262 else
8263 {
8264 while (!cur.next_sibling() && cur != n)
8265 cur = cur.parent();
8266
8267 if (cur != n) cur = cur.next_sibling();
8268 }
8269 }
8270
8271 break;
8272 }
8273
8275 {
8276 for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
8277 step_push(ns, c, alloc);
8278
8279 break;
8280 }
8281
8283 {
8284 for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
8285 step_push(ns, c, alloc);
8286
8287 break;
8288 }
8289
8290 case axis_following:
8291 {
8292 xml_node cur = n;
8293
8294 // exit from this node so that we don't include descendants
8295 while (cur && !cur.next_sibling()) cur = cur.parent();
8296 cur = cur.next_sibling();
8297
8298 for (;;)
8299 {
8300 step_push(ns, cur, alloc);
8301
8302 if (cur.first_child())
8303 cur = cur.first_child();
8304 else if (cur.next_sibling())
8305 cur = cur.next_sibling();
8306 else
8307 {
8308 while (cur && !cur.next_sibling()) cur = cur.parent();
8309 cur = cur.next_sibling();
8310
8311 if (!cur) break;
8312 }
8313 }
8314
8315 break;
8316 }
8317
8318 case axis_preceding:
8319 {
8320 xml_node cur = n;
8321
8322 while (cur && !cur.previous_sibling()) cur = cur.parent();
8323 cur = cur.previous_sibling();
8324
8325 for (;;)
8326 {
8327 if (cur.last_child())
8328 cur = cur.last_child();
8329 else
8330 {
8331 // leaf node, can't be ancestor
8332 step_push(ns, cur, alloc);
8333
8334 if (cur.previous_sibling())
8335 cur = cur.previous_sibling();
8336 else
8337 {
8338 do
8339 {
8340 cur = cur.parent();
8341 if (!cur) break;
8342
8343 if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
8344 }
8345 while (!cur.previous_sibling());
8346
8347 cur = cur.previous_sibling();
8348
8349 if (!cur) break;
8350 }
8351 }
8352 }
8353
8354 break;
8355 }
8356
8357 case axis_ancestor:
8359 {
8360 if (axis == axis_ancestor_or_self)
8361 step_push(ns, n, alloc);
8362
8363 xml_node cur = n.parent();
8364
8365 while (cur)
8366 {
8367 step_push(ns, cur, alloc);
8368
8369 cur = cur.parent();
8370 }
8371
8372 break;
8373 }
8374
8375 case axis_self:
8376 {
8377 step_push(ns, n, alloc);
8378
8379 break;
8380 }
8381
8382 case axis_parent:
8383 {
8384 if (n.parent()) step_push(ns, n.parent(), alloc);
8385
8386 break;
8387 }
8388
8389 default:
8390 assert(!"Unimplemented axis");
8391 }
8392 }
8393
8394 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
8395 {
8396 const axis_t axis = T::axis;
8397
8398 switch (axis)
8399 {
8400 case axis_ancestor:
8402 {
8403 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
8404 step_push(ns, a, p, alloc);
8405
8406 xml_node cur = p;
8407
8408 while (cur)
8409 {
8410 step_push(ns, cur, alloc);
8411
8412 cur = cur.parent();
8413 }
8414
8415 break;
8416 }
8417
8419 case axis_self:
8420 {
8421 if (_test == nodetest_type_node) // reject attributes based on principal node type test
8422 step_push(ns, a, p, alloc);
8423
8424 break;
8425 }
8426
8427 case axis_following:
8428 {
8429 xml_node cur = p;
8430
8431 for (;;)
8432 {
8433 if (cur.first_child())
8434 cur = cur.first_child();
8435 else if (cur.next_sibling())
8436 cur = cur.next_sibling();
8437 else
8438 {
8439 while (cur && !cur.next_sibling()) cur = cur.parent();
8440 cur = cur.next_sibling();
8441
8442 if (!cur) break;
8443 }
8444
8445 step_push(ns, cur, alloc);
8446 }
8447
8448 break;
8449 }
8450
8451 case axis_parent:
8452 {
8453 step_push(ns, p, alloc);
8454
8455 break;
8456 }
8457
8458 case axis_preceding:
8459 {
8460 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
8461 step_fill(ns, p, alloc, v);
8462 break;
8463 }
8464
8465 default:
8466 assert(!"Unimplemented axis");
8467 }
8468 }
8469
8470 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
8471 {
8472 const axis_t axis = T::axis;
8473 bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
8474
8476 ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
8477
8478 if (_left)
8479 {
8480 xpath_node_set_raw s = _left->eval_node_set(c, stack);
8481
8482 // self axis preserves the original order
8483 if (axis == axis_self) ns.set_type(s.type());
8484
8485 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
8486 {
8487 size_t size = ns.size();
8488
8489 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
8490 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
8491
8492 if (it->node())
8493 step_fill(ns, it->node(), stack.result, v);
8494 else if (attributes)
8495 step_fill(ns, it->attribute(), it->parent(), stack.result, v);
8496
8497 apply_predicates(ns, size, stack);
8498 }
8499 }
8500 else
8501 {
8502 if (c.n.node())
8503 step_fill(ns, c.n.node(), stack.result, v);
8504 else if (attributes)
8505 step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
8506
8507 apply_predicates(ns, 0, stack);
8508 }
8509
8510 // child, attribute and self axes always generate unique set of nodes
8511 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
8512 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
8513 ns.remove_duplicates();
8514
8515 return ns;
8516 }
8517
8518 public:
8519 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
8520 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
8521 {
8522 assert(type == ast_string_constant);
8523 _data.string = value;
8524 }
8525
8526 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
8527 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
8528 {
8529 assert(type == ast_number_constant);
8530 _data.number = value;
8531 }
8532
8533 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
8534 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
8535 {
8536 assert(type == ast_variable);
8537 _data.variable = value;
8538 }
8539
8540 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
8541 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
8542 {
8543 }
8544
8545 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
8546 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
8547 {
8548 _data.nodetest = contents;
8549 }
8550
8552 {
8553 _next = value;
8554 }
8555
8557 {
8558 _right = value;
8559 }
8560
8561 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
8562 {
8563 switch (_type)
8564 {
8565 case ast_op_or:
8566 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
8567
8568 case ast_op_and:
8569 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
8570
8571 case ast_op_equal:
8572 return compare_eq(_left, _right, c, stack, equal_to());
8573
8574 case ast_op_not_equal:
8575 return compare_eq(_left, _right, c, stack, not_equal_to());
8576
8577 case ast_op_less:
8578 return compare_rel(_left, _right, c, stack, less());
8579
8580 case ast_op_greater:
8581 return compare_rel(_right, _left, c, stack, less());
8582
8584 return compare_rel(_left, _right, c, stack, less_equal());
8585
8587 return compare_rel(_right, _left, c, stack, less_equal());
8588
8590 {
8592
8593 xpath_string lr = _left->eval_string(c, stack);
8594 xpath_string rr = _right->eval_string(c, stack);
8595
8596 return starts_with(lr.c_str(), rr.c_str());
8597 }
8598
8599 case ast_func_contains:
8600 {
8602
8603 xpath_string lr = _left->eval_string(c, stack);
8604 xpath_string rr = _right->eval_string(c, stack);
8605
8606 return find_substring(lr.c_str(), rr.c_str()) != 0;
8607 }
8608
8609 case ast_func_boolean:
8610 return _left->eval_boolean(c, stack);
8611
8612 case ast_func_not:
8613 return !_left->eval_boolean(c, stack);
8614
8615 case ast_func_true:
8616 return true;
8617
8618 case ast_func_false:
8619 return false;
8620
8621 case ast_func_lang:
8622 {
8623 if (c.n.attribute()) return false;
8624
8626
8627 xpath_string lang = _left->eval_string(c, stack);
8628
8629 for (xml_node n = c.n.node(); n; n = n.parent())
8630 {
8631 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
8632
8633 if (a)
8634 {
8635 const char_t* value = a.value();
8636
8637 // strnicmp / strncasecmp is not portable
8638 for (const char_t* lit = lang.c_str(); *lit; ++lit)
8639 {
8640 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
8641 ++value;
8642 }
8643
8644 return *value == 0 || *value == '-';
8645 }
8646 }
8647
8648 return false;
8649 }
8650
8651 case ast_variable:
8652 {
8653 assert(_rettype == _data.variable->type());
8654
8655 if (_rettype == xpath_type_boolean)
8656 return _data.variable->get_boolean();
8657
8658 // fallthrough to type conversion
8659 }
8660
8661 default:
8662 {
8663 switch (_rettype)
8664 {
8665 case xpath_type_number:
8666 return convert_number_to_boolean(eval_number(c, stack));
8667
8668 case xpath_type_string:
8669 {
8671
8672 return !eval_string(c, stack).empty();
8673 }
8674
8675 case xpath_type_node_set:
8676 {
8678
8679 return !eval_node_set(c, stack).empty();
8680 }
8681
8682 default:
8683 assert(!"Wrong expression for return type boolean");
8684 return false;
8685 }
8686 }
8687 }
8688 }
8689
8690 double eval_number(const xpath_context& c, const xpath_stack& stack)
8691 {
8692 switch (_type)
8693 {
8694 case ast_op_add:
8695 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
8696
8697 case ast_op_subtract:
8698 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
8699
8700 case ast_op_multiply:
8701 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
8702
8703 case ast_op_divide:
8704 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
8705
8706 case ast_op_mod:
8707 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
8708
8709 case ast_op_negate:
8710 return -_left->eval_number(c, stack);
8711
8713 return _data.number;
8714
8715 case ast_func_last:
8716 return static_cast<double>(c.size);
8717
8718 case ast_func_position:
8719 return static_cast<double>(c.position);
8720
8721 case ast_func_count:
8722 {
8724
8725 return static_cast<double>(_left->eval_node_set(c, stack).size());
8726 }
8727
8729 {
8731
8732 return static_cast<double>(string_value(c.n, stack.result).length());
8733 }
8734
8736 {
8738
8739 return static_cast<double>(_left->eval_string(c, stack).length());
8740 }
8741
8742 case ast_func_number_0:
8743 {
8745
8747 }
8748
8749 case ast_func_number_1:
8750 return _left->eval_number(c, stack);
8751
8752 case ast_func_sum:
8753 {
8755
8756 double r = 0;
8757
8758 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8759
8760 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
8761 {
8763
8765 }
8766
8767 return r;
8768 }
8769
8770 case ast_func_floor:
8771 {
8772 double r = _left->eval_number(c, stack);
8773
8774 return r == r ? floor(r) : r;
8775 }
8776
8777 case ast_func_ceiling:
8778 {
8779 double r = _left->eval_number(c, stack);
8780
8781 return r == r ? ceil(r) : r;
8782 }
8783
8784 case ast_func_round:
8785 return round_nearest_nzero(_left->eval_number(c, stack));
8786
8787 case ast_variable:
8788 {
8789 assert(_rettype == _data.variable->type());
8790
8791 if (_rettype == xpath_type_number)
8792 return _data.variable->get_number();
8793
8794 // fallthrough to type conversion
8795 }
8796
8797 default:
8798 {
8799 switch (_rettype)
8800 {
8801 case xpath_type_boolean:
8802 return eval_boolean(c, stack) ? 1 : 0;
8803
8804 case xpath_type_string:
8805 {
8807
8808 return convert_string_to_number(eval_string(c, stack).c_str());
8809 }
8810
8811 case xpath_type_node_set:
8812 {
8814
8815 return convert_string_to_number(eval_string(c, stack).c_str());
8816 }
8817
8818 default:
8819 assert(!"Wrong expression for return type number");
8820 return 0;
8821 }
8822
8823 }
8824 }
8825 }
8826
8828 {
8829 assert(_type == ast_func_concat);
8830
8831 xpath_allocator_capture ct(stack.temp);
8832
8833 // count the string number
8834 size_t count = 1;
8835 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
8836
8837 // gather all strings
8838 xpath_string static_buffer[4];
8839 xpath_string* buffer = static_buffer;
8840
8841 // allocate on-heap for large concats
8842 if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
8843 {
8844 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
8845 assert(buffer);
8846 }
8847
8848 // evaluate all strings to temporary stack
8849 xpath_stack swapped_stack = {stack.temp, stack.result};
8850
8851 buffer[0] = _left->eval_string(c, swapped_stack);
8852
8853 size_t pos = 1;
8854 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
8855 assert(pos == count);
8856
8857 // get total length
8858 size_t length = 0;
8859 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
8860
8861 // create final string
8862 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
8863 assert(result);
8864
8865 char_t* ri = result;
8866
8867 for (size_t j = 0; j < count; ++j)
8868 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
8869 *ri++ = *bi;
8870
8871 *ri = 0;
8872
8873 return xpath_string(result, true);
8874 }
8875
8877 {
8878 switch (_type)
8879 {
8881 return xpath_string_const(_data.string);
8882
8884 {
8885 xpath_node na = c.n;
8886
8887 return xpath_string_const(local_name(na));
8888 }
8889
8891 {
8893
8894 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8895 xpath_node na = ns.first();
8896
8897 return xpath_string_const(local_name(na));
8898 }
8899
8900 case ast_func_name_0:
8901 {
8902 xpath_node na = c.n;
8903
8905 }
8906
8907 case ast_func_name_1:
8908 {
8910
8911 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8912 xpath_node na = ns.first();
8913
8915 }
8916
8918 {
8919 xpath_node na = c.n;
8920
8922 }
8923
8925 {
8927
8928 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8929 xpath_node na = ns.first();
8930
8932 }
8933
8934 case ast_func_string_0:
8935 return string_value(c.n, stack.result);
8936
8937 case ast_func_string_1:
8938 return _left->eval_string(c, stack);
8939
8940 case ast_func_concat:
8941 return eval_string_concat(c, stack);
8942
8944 {
8945 xpath_allocator_capture cr(stack.temp);
8946
8947 xpath_stack swapped_stack = {stack.temp, stack.result};
8948
8949 xpath_string s = _left->eval_string(c, swapped_stack);
8950 xpath_string p = _right->eval_string(c, swapped_stack);
8951
8952 const char_t* pos = find_substring(s.c_str(), p.c_str());
8953
8954 return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
8955 }
8956
8958 {
8959 xpath_allocator_capture cr(stack.temp);
8960
8961 xpath_stack swapped_stack = {stack.temp, stack.result};
8962
8963 xpath_string s = _left->eval_string(c, swapped_stack);
8964 xpath_string p = _right->eval_string(c, swapped_stack);
8965
8966 const char_t* pos = find_substring(s.c_str(), p.c_str());
8967 if (!pos) return xpath_string();
8968
8969 const char_t* result = pos + p.length();
8970
8971 return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
8972 }
8973
8975 {
8976 xpath_allocator_capture cr(stack.temp);
8977
8978 xpath_stack swapped_stack = {stack.temp, stack.result};
8979
8980 xpath_string s = _left->eval_string(c, swapped_stack);
8981 size_t s_length = s.length();
8982
8983 double first = round_nearest(_right->eval_number(c, stack));
8984
8985 if (is_nan(first)) return xpath_string(); // NaN
8986 else if (first >= s_length + 1) return xpath_string();
8987
8988 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8989 assert(1 <= pos && pos <= s_length + 1);
8990
8991 const char_t* rbegin = s.c_str() + (pos - 1);
8992
8993 return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
8994 }
8995
8997 {
8998 xpath_allocator_capture cr(stack.temp);
8999
9000 xpath_stack swapped_stack = {stack.temp, stack.result};
9001
9002 xpath_string s = _left->eval_string(c, swapped_stack);
9003 size_t s_length = s.length();
9004
9005 double first = round_nearest(_right->eval_number(c, stack));
9006 double last = first + round_nearest(_right->_next->eval_number(c, stack));
9007
9008 if (is_nan(first) || is_nan(last)) return xpath_string();
9009 else if (first >= s_length + 1) return xpath_string();
9010 else if (first >= last) return xpath_string();
9011 else if (last < 1) return xpath_string();
9012
9013 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
9014 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
9015
9016 assert(1 <= pos && pos <= end && end <= s_length + 1);
9017 const char_t* rbegin = s.c_str() + (pos - 1);
9018 const char_t* rend = s.c_str() + (end - 1);
9019
9020 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
9021 }
9022
9024 {
9025 xpath_string s = string_value(c.n, stack.result);
9026
9027 normalize_space(s.data(stack.result));
9028
9029 return s;
9030 }
9031
9033 {
9034 xpath_string s = _left->eval_string(c, stack);
9035
9036 normalize_space(s.data(stack.result));
9037
9038 return s;
9039 }
9040
9041 case ast_func_translate:
9042 {
9043 xpath_allocator_capture cr(stack.temp);
9044
9045 xpath_stack swapped_stack = {stack.temp, stack.result};
9046
9047 xpath_string s = _left->eval_string(c, stack);
9048 xpath_string from = _right->eval_string(c, swapped_stack);
9049 xpath_string to = _right->_next->eval_string(c, swapped_stack);
9050
9051 translate(s.data(stack.result), from.c_str(), to.c_str());
9052
9053 return s;
9054 }
9055
9056 case ast_variable:
9057 {
9058 assert(_rettype == _data.variable->type());
9059
9060 if (_rettype == xpath_type_string)
9061 return xpath_string_const(_data.variable->get_string());
9062
9063 // fallthrough to type conversion
9064 }
9065
9066 default:
9067 {
9068 switch (_rettype)
9069 {
9070 case xpath_type_boolean:
9071 return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
9072
9073 case xpath_type_number:
9074 return convert_number_to_string(eval_number(c, stack), stack.result);
9075
9076 case xpath_type_node_set:
9077 {
9078 xpath_allocator_capture cr(stack.temp);
9079
9080 xpath_stack swapped_stack = {stack.temp, stack.result};
9081
9082 xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
9083 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
9084 }
9085
9086 default:
9087 assert(!"Wrong expression for return type string");
9088 return xpath_string();
9089 }
9090 }
9091 }
9092 }
9093
9095 {
9096 switch (_type)
9097 {
9098 case ast_op_union:
9099 {
9100 xpath_allocator_capture cr(stack.temp);
9101
9102 xpath_stack swapped_stack = {stack.temp, stack.result};
9103
9104 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
9105 xpath_node_set_raw rs = _right->eval_node_set(c, stack);
9106
9107 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
9108 rs.set_type(xpath_node_set::type_unsorted);
9109
9110 rs.append(ls.begin(), ls.end(), stack.result);
9111 rs.remove_duplicates();
9112
9113 return rs;
9114 }
9115
9116 case ast_filter:
9117 case ast_filter_posinv:
9118 {
9119 xpath_node_set_raw set = _left->eval_node_set(c, stack);
9120
9121 // either expression is a number or it contains position() call; sort by document order
9122 if (_type == ast_filter) set.sort_do();
9123
9124 apply_predicate(set, 0, _right, stack);
9125
9126 return set;
9127 }
9128
9129 case ast_func_id:
9130 return xpath_node_set_raw();
9131
9132 case ast_step:
9133 {
9134 switch (_axis)
9135 {
9136 case axis_ancestor:
9137 return step_do(c, stack, axis_to_type<axis_ancestor>());
9138
9140 return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
9141
9142 case axis_attribute:
9143 return step_do(c, stack, axis_to_type<axis_attribute>());
9144
9145 case axis_child:
9146 return step_do(c, stack, axis_to_type<axis_child>());
9147
9148 case axis_descendant:
9149 return step_do(c, stack, axis_to_type<axis_descendant>());
9150
9153
9154 case axis_following:
9155 return step_do(c, stack, axis_to_type<axis_following>());
9156
9159
9160 case axis_namespace:
9161 // namespaced axis is not supported
9162 return xpath_node_set_raw();
9163
9164 case axis_parent:
9165 return step_do(c, stack, axis_to_type<axis_parent>());
9166
9167 case axis_preceding:
9168 return step_do(c, stack, axis_to_type<axis_preceding>());
9169
9172
9173 case axis_self:
9174 return step_do(c, stack, axis_to_type<axis_self>());
9175
9176 default:
9177 assert(!"Unknown axis");
9178 return xpath_node_set_raw();
9179 }
9180 }
9181
9182 case ast_step_root:
9183 {
9184 assert(!_right); // root step can't have any predicates
9185
9187
9188 ns.set_type(xpath_node_set::type_sorted);
9189
9190 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
9191 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
9192
9193 return ns;
9194 }
9195
9196 case ast_variable:
9197 {
9198 assert(_rettype == _data.variable->type());
9199
9200 if (_rettype == xpath_type_node_set)
9201 {
9202 const xpath_node_set& s = _data.variable->get_node_set();
9203
9205
9206 ns.set_type(s.type());
9207 ns.append(s.begin(), s.end(), stack.result);
9208
9209 return ns;
9210 }
9211
9212 // fallthrough to type conversion
9213 }
9214
9215 default:
9216 assert(!"Wrong expression for return type node set");
9217 return xpath_node_set_raw();
9218 }
9219 }
9220
9222 {
9223 switch (_type)
9224 {
9225 case ast_func_position:
9226 return false;
9227
9230 case ast_variable:
9231 return true;
9232
9233 case ast_step:
9234 case ast_step_root:
9235 return true;
9236
9237 case ast_predicate:
9238 case ast_filter:
9239 case ast_filter_posinv:
9240 return true;
9241
9242 default:
9243 if (_left && !_left->is_posinv()) return false;
9244
9245 for (xpath_ast_node* n = _right; n; n = n->_next)
9246 if (!n->is_posinv()) return false;
9247
9248 return true;
9249 }
9250 }
9251
9252 xpath_value_type rettype() const
9253 {
9254 return static_cast<xpath_value_type>(_rettype);
9255 }
9256 };
9257
9259 {
9262
9263 const char_t* _query;
9264 xpath_variable_set* _variables;
9265
9266 xpath_parse_result* _result;
9267
9268 char_t _scratch[32];
9269
9270 #ifdef PUGIXML_NO_EXCEPTIONS
9271 jmp_buf _error_handler;
9272 #endif
9273
9274 void throw_error(const char* message)
9275 {
9276 _result->error = message;
9277 _result->offset = _lexer.current_pos() - _query;
9278
9279 #ifdef PUGIXML_NO_EXCEPTIONS
9280 longjmp(_error_handler, 1);
9281 #else
9282 throw xpath_exception(*_result);
9283 #endif
9284 }
9285
9287 {
9288 #ifdef PUGIXML_NO_EXCEPTIONS
9289 throw_error("Out of memory");
9290 #else
9291 throw std::bad_alloc();
9292 #endif
9293 }
9294
9296 {
9297 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
9298
9299 if (!result) throw_error_oom();
9300
9301 return result;
9302 }
9303
9304 const char_t* alloc_string(const xpath_lexer_string& value)
9305 {
9306 if (value.begin)
9307 {
9308 size_t length = static_cast<size_t>(value.end - value.begin);
9309
9310 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
9311 if (!c) throw_error_oom();
9312 assert(c); // workaround for clang static analysis
9313
9314 memcpy(c, value.begin, length * sizeof(char_t));
9315 c[length] = 0;
9316
9317 return c;
9318 }
9319 else return 0;
9320 }
9321
9323 {
9324 assert(argc <= 1);
9325
9326 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
9327
9328 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
9329 }
9330
9332 {
9333 switch (name.begin[0])
9334 {
9335 case 'b':
9336 if (name == PUGIXML_TEXT("boolean") && argc == 1)
9337 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
9338
9339 break;
9340
9341 case 'c':
9342 if (name == PUGIXML_TEXT("count") && argc == 1)
9343 {
9344 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
9345 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
9346 }
9347 else if (name == PUGIXML_TEXT("contains") && argc == 2)
9348 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
9349 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
9350 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
9351 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
9352 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
9353
9354 break;
9355
9356 case 'f':
9357 if (name == PUGIXML_TEXT("false") && argc == 0)
9358 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
9359 else if (name == PUGIXML_TEXT("floor") && argc == 1)
9360 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
9361
9362 break;
9363
9364 case 'i':
9365 if (name == PUGIXML_TEXT("id") && argc == 1)
9366 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
9367
9368 break;
9369
9370 case 'l':
9371 if (name == PUGIXML_TEXT("last") && argc == 0)
9372 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
9373 else if (name == PUGIXML_TEXT("lang") && argc == 1)
9374 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
9375 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
9377
9378 break;
9379
9380 case 'n':
9381 if (name == PUGIXML_TEXT("name") && argc <= 1)
9383 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
9385 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
9386 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
9387 else if (name == PUGIXML_TEXT("not") && argc == 1)
9388 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
9389 else if (name == PUGIXML_TEXT("number") && argc <= 1)
9390 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
9391
9392 break;
9393
9394 case 'p':
9395 if (name == PUGIXML_TEXT("position") && argc == 0)
9396 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
9397
9398 break;
9399
9400 case 'r':
9401 if (name == PUGIXML_TEXT("round") && argc == 1)
9402 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
9403
9404 break;
9405
9406 case 's':
9407 if (name == PUGIXML_TEXT("string") && argc <= 1)
9408 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
9409 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
9410 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
9411 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
9412 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
9413 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
9414 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
9415 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
9416 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
9417 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
9418 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
9419 else if (name == PUGIXML_TEXT("sum") && argc == 1)
9420 {
9421 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
9422 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
9423 }
9424
9425 break;
9426
9427 case 't':
9428 if (name == PUGIXML_TEXT("translate") && argc == 3)
9429 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
9430 else if (name == PUGIXML_TEXT("true") && argc == 0)
9431 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
9432
9433 break;
9434
9435 default:
9436 break;
9437 }
9438
9439 throw_error("Unrecognized function or wrong parameter count");
9440
9441 return 0;
9442 }
9443
9444 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
9445 {
9446 specified = true;
9447
9448 switch (name.begin[0])
9449 {
9450 case 'a':
9451 if (name == PUGIXML_TEXT("ancestor"))
9452 return axis_ancestor;
9453 else if (name == PUGIXML_TEXT("ancestor-or-self"))
9454 return axis_ancestor_or_self;
9455 else if (name == PUGIXML_TEXT("attribute"))
9456 return axis_attribute;
9457
9458 break;
9459
9460 case 'c':
9461 if (name == PUGIXML_TEXT("child"))
9462 return axis_child;
9463
9464 break;
9465
9466 case 'd':
9467 if (name == PUGIXML_TEXT("descendant"))
9468 return axis_descendant;
9469 else if (name == PUGIXML_TEXT("descendant-or-self"))
9471
9472 break;
9473
9474 case 'f':
9475 if (name == PUGIXML_TEXT("following"))
9476 return axis_following;
9477 else if (name == PUGIXML_TEXT("following-sibling"))
9479
9480 break;
9481
9482 case 'n':
9483 if (name == PUGIXML_TEXT("namespace"))
9484 return axis_namespace;
9485
9486 break;
9487
9488 case 'p':
9489 if (name == PUGIXML_TEXT("parent"))
9490 return axis_parent;
9491 else if (name == PUGIXML_TEXT("preceding"))
9492 return axis_preceding;
9493 else if (name == PUGIXML_TEXT("preceding-sibling"))
9495
9496 break;
9497
9498 case 's':
9499 if (name == PUGIXML_TEXT("self"))
9500 return axis_self;
9501
9502 break;
9503
9504 default:
9505 break;
9506 }
9507
9508 specified = false;
9509 return axis_child;
9510 }
9511
9513 {
9514 switch (name.begin[0])
9515 {
9516 case 'c':
9517 if (name == PUGIXML_TEXT("comment"))
9518 return nodetest_type_comment;
9519
9520 break;
9521
9522 case 'n':
9523 if (name == PUGIXML_TEXT("node"))
9524 return nodetest_type_node;
9525
9526 break;
9527
9528 case 'p':
9529 if (name == PUGIXML_TEXT("processing-instruction"))
9530 return nodetest_type_pi;
9531
9532 break;
9533
9534 case 't':
9535 if (name == PUGIXML_TEXT("text"))
9536 return nodetest_type_text;
9537
9538 break;
9539
9540 default:
9541 break;
9542 }
9543
9544 return nodetest_none;
9545 }
9546
9547 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
9549 {
9550 switch (_lexer.current())
9551 {
9552 case lex_var_ref:
9553 {
9555
9556 if (!_variables)
9557 throw_error("Unknown variable: variable set is not provided");
9558
9559 xpath_variable* var = get_variable_scratch(_scratch, _variables, name.begin, name.end);
9560
9561 if (!var)
9562 throw_error("Unknown variable: variable set does not contain the given name");
9563
9564 _lexer.next();
9565
9566 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
9567 }
9568
9569 case lex_open_brace:
9570 {
9571 _lexer.next();
9572
9574
9576 throw_error("Unmatched braces");
9577
9578 _lexer.next();
9579
9580 return n;
9581 }
9582
9583 case lex_quoted_string:
9584 {
9585 const char_t* value = alloc_string(_lexer.contents());
9586
9587 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
9588 _lexer.next();
9589
9590 return n;
9591 }
9592
9593 case lex_number:
9594 {
9595 double value = 0;
9596
9599
9600 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
9601 _lexer.next();
9602
9603 return n;
9604 }
9605
9606 case lex_string:
9607 {
9608 xpath_ast_node* args[2] = {0};
9609 size_t argc = 0;
9610
9611 xpath_lexer_string function = _lexer.contents();
9612 _lexer.next();
9613
9614 xpath_ast_node* last_arg = 0;
9615
9616 if (_lexer.current() != lex_open_brace)
9617 throw_error("Unrecognized function call");
9618 _lexer.next();
9619
9621 args[argc++] = parse_expression();
9622
9623 while (_lexer.current() != lex_close_brace)
9624 {
9625 if (_lexer.current() != lex_comma)
9626 throw_error("No comma between function arguments");
9627 _lexer.next();
9628
9630
9631 if (argc < 2) args[argc] = n;
9632 else last_arg->set_next(n);
9633
9634 argc++;
9635 last_arg = n;
9636 }
9637
9638 _lexer.next();
9639
9640 return parse_function(function, argc, args);
9641 }
9642
9643 default:
9644 throw_error("Unrecognizable primary expression");
9645
9646 return 0;
9647 }
9648 }
9649
9650 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
9651 // Predicate ::= '[' PredicateExpr ']'
9652 // PredicateExpr ::= Expr
9654 {
9656
9658 {
9659 _lexer.next();
9660
9662
9663 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
9664
9665 bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
9666
9667 n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
9668
9670 throw_error("Unmatched square brace");
9671
9672 _lexer.next();
9673 }
9674
9675 return n;
9676 }
9677
9678 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
9679 // AxisSpecifier ::= AxisName '::' | '@'?
9680 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
9681 // NameTest ::= '*' | NCName ':' '*' | QName
9682 // AbbreviatedStep ::= '.' | '..'
9684 {
9685 if (set && set->rettype() != xpath_type_node_set)
9686 throw_error("Step has to be applied to node set");
9687
9688 bool axis_specified = false;
9689 axis_t axis = axis_child; // implied child axis
9690
9692 {
9693 axis = axis_attribute;
9694 axis_specified = true;
9695
9696 _lexer.next();
9697 }
9698 else if (_lexer.current() == lex_dot)
9699 {
9700 _lexer.next();
9701
9703 }
9704 else if (_lexer.current() == lex_double_dot)
9705 {
9706 _lexer.next();
9707
9709 }
9710
9711 nodetest_t nt_type = nodetest_none;
9712 xpath_lexer_string nt_name;
9713
9714 if (_lexer.current() == lex_string)
9715 {
9716 // node name test
9717 nt_name = _lexer.contents();
9718 _lexer.next();
9719
9720 // was it an axis name?
9722 {
9723 // parse axis name
9724 if (axis_specified) throw_error("Two axis specifiers in one step");
9725
9726 axis = parse_axis_name(nt_name, axis_specified);
9727
9728 if (!axis_specified) throw_error("Unknown axis");
9729
9730 // read actual node test
9731 _lexer.next();
9732
9733 if (_lexer.current() == lex_multiply)
9734 {
9735 nt_type = nodetest_all;
9736 nt_name = xpath_lexer_string();
9737 _lexer.next();
9738 }
9739 else if (_lexer.current() == lex_string)
9740 {
9741 nt_name = _lexer.contents();
9742 _lexer.next();
9743 }
9744 else throw_error("Unrecognized node test");
9745 }
9746
9747 if (nt_type == nodetest_none)
9748 {
9749 // node type test or processing-instruction
9750 if (_lexer.current() == lex_open_brace)
9751 {
9752 _lexer.next();
9753
9755 {
9756 _lexer.next();
9757
9758 nt_type = parse_node_test_type(nt_name);
9759
9760 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
9761
9762 nt_name = xpath_lexer_string();
9763 }
9764 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
9765 {
9767 throw_error("Only literals are allowed as arguments to processing-instruction()");
9768
9769 nt_type = nodetest_pi;
9770 nt_name = _lexer.contents();
9771 _lexer.next();
9772
9774 throw_error("Unmatched brace near processing-instruction()");
9775 _lexer.next();
9776 }
9777 else
9778 throw_error("Unmatched brace near node type test");
9779
9780 }
9781 // QName or NCName:*
9782 else
9783 {
9784 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
9785 {
9786 nt_name.end--; // erase *
9787
9788 nt_type = nodetest_all_in_namespace;
9789 }
9790 else nt_type = nodetest_name;
9791 }
9792 }
9793 }
9794 else if (_lexer.current() == lex_multiply)
9795 {
9796 nt_type = nodetest_all;
9797 _lexer.next();
9798 }
9799 else throw_error("Unrecognized node test");
9800
9801 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
9802
9803 xpath_ast_node* last = 0;
9804
9806 {
9807 _lexer.next();
9808
9810
9811 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
9812
9814 throw_error("Unmatched square brace");
9815 _lexer.next();
9816
9817 if (last) last->set_next(pred);
9818 else n->set_right(pred);
9819
9820 last = pred;
9821 }
9822
9823 return n;
9824 }
9825
9826 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
9828 {
9829 xpath_ast_node* n = parse_step(set);
9830
9832 {
9833 lexeme_t l = _lexer.current();
9834 _lexer.next();
9835
9836 if (l == lex_double_slash)
9838
9839 n = parse_step(n);
9840 }
9841
9842 return n;
9843 }
9844
9845 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
9846 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
9848 {
9849 if (_lexer.current() == lex_slash)
9850 {
9851 _lexer.next();
9852
9853 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
9854
9855 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
9856 lexeme_t l = _lexer.current();
9857
9858 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
9860 else
9861 return n;
9862 }
9863 else if (_lexer.current() == lex_double_slash)
9864 {
9865 _lexer.next();
9866
9867 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
9869
9871 }
9872
9873 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
9875 }
9876
9877 // PathExpr ::= LocationPath
9878 // | FilterExpr
9879 // | FilterExpr '/' RelativeLocationPath
9880 // | FilterExpr '//' RelativeLocationPath
9881 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
9882 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
9884 {
9885 // Clarification.
9886 // PathExpr begins with either LocationPath or FilterExpr.
9887 // FilterExpr begins with PrimaryExpr
9888 // PrimaryExpr begins with '$' in case of it being a variable reference,
9889 // '(' in case of it being an expression, string literal, number constant or
9890 // function call.
9891
9895 {
9896 if (_lexer.current() == lex_string)
9897 {
9898 // This is either a function call, or not - if not, we shall proceed with location path
9899 const char_t* state = _lexer.state();
9900
9901 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
9902
9903 if (*state != '(') return parse_location_path();
9904
9905 // This looks like a function call; however this still can be a node-test. Check it.
9907 }
9908
9910
9912 {
9913 lexeme_t l = _lexer.current();
9914 _lexer.next();
9915
9916 if (l == lex_double_slash)
9917 {
9918 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
9919
9921 }
9922
9923 // select from location path
9925 }
9926
9927 return n;
9928 }
9929 else if (_lexer.current() == lex_minus)
9930 {
9931 _lexer.next();
9932
9933 // precedence 7+ - only parses union expressions
9935
9936 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
9937 }
9938 else
9939 return parse_location_path();
9940 }
9941
9943 {
9945 xpath_value_type rettype;
9947
9949 {
9950 }
9951
9952 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
9953 {
9954 }
9955
9957 {
9958 switch (lexer.current())
9959 {
9960 case lex_string:
9961 if (lexer.contents() == PUGIXML_TEXT("or"))
9962 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
9963 else if (lexer.contents() == PUGIXML_TEXT("and"))
9964 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
9965 else if (lexer.contents() == PUGIXML_TEXT("div"))
9966 return binary_op_t(ast_op_divide, xpath_type_number, 6);
9967 else if (lexer.contents() == PUGIXML_TEXT("mod"))
9968 return binary_op_t(ast_op_mod, xpath_type_number, 6);
9969 else
9970 return binary_op_t();
9971
9972 case lex_equal:
9973 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
9974
9975 case lex_not_equal:
9976 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
9977
9978 case lex_less:
9979 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
9980
9981 case lex_greater:
9982 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
9983
9984 case lex_less_or_equal:
9985 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
9986
9988 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
9989
9990 case lex_plus:
9991 return binary_op_t(ast_op_add, xpath_type_number, 5);
9992
9993 case lex_minus:
9994 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
9995
9996 case lex_multiply:
9997 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
9998
9999 case lex_union:
10000 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
10001
10002 default:
10003 return binary_op_t();
10004 }
10005 }
10006 };
10007
10009 {
10011
10012 while (op.asttype != ast_unknown && op.precedence >= limit)
10013 {
10014 _lexer.next();
10015
10017
10019
10020 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
10021 {
10022 rhs = parse_expression_rec(rhs, nextop.precedence);
10023
10024 nextop = binary_op_t::parse(_lexer);
10025 }
10026
10027 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
10028 throw_error("Union operator has to be applied to node sets");
10029
10030 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
10031
10033 }
10034
10035 return lhs;
10036 }
10037
10038 // Expr ::= OrExpr
10039 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
10040 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
10041 // EqualityExpr ::= RelationalExpr
10042 // | EqualityExpr '=' RelationalExpr
10043 // | EqualityExpr '!=' RelationalExpr
10044 // RelationalExpr ::= AdditiveExpr
10045 // | RelationalExpr '<' AdditiveExpr
10046 // | RelationalExpr '>' AdditiveExpr
10047 // | RelationalExpr '<=' AdditiveExpr
10048 // | RelationalExpr '>=' AdditiveExpr
10049 // AdditiveExpr ::= MultiplicativeExpr
10050 // | AdditiveExpr '+' MultiplicativeExpr
10051 // | AdditiveExpr '-' MultiplicativeExpr
10052 // MultiplicativeExpr ::= UnaryExpr
10053 // | MultiplicativeExpr '*' UnaryExpr
10054 // | MultiplicativeExpr 'div' UnaryExpr
10055 // | MultiplicativeExpr 'mod' UnaryExpr
10060
10061 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
10062 {
10063 }
10064
10066 {
10067 xpath_ast_node* result = parse_expression();
10068
10069 if (_lexer.current() != lex_eof)
10070 {
10071 // there are still unparsed tokens left, error
10072 throw_error("Incorrect query");
10073 }
10074
10075 return result;
10076 }
10077
10078 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
10079 {
10080 xpath_parser parser(query, variables, alloc, result);
10081
10082 #ifdef PUGIXML_NO_EXCEPTIONS
10083 int error = setjmp(parser._error_handler);
10084
10085 return (error == 0) ? parser.parse() : 0;
10086 #else
10087 return parser.parse();
10088 #endif
10089 }
10090 };
10091
10093 {
10095 {
10096 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
10097
10098 return new (memory) xpath_query_impl();
10099 }
10100
10101 static void destroy(void* ptr)
10102 {
10103 if (!ptr) return;
10104
10105 // free all allocated pages
10106 static_cast<xpath_query_impl*>(ptr)->alloc.release();
10107
10108 // free allocator memory (with the first page)
10110 }
10111
10113 {
10114 block.next = 0;
10115 block.capacity = sizeof(block.data);
10116 }
10117
10121 };
10122
10124 {
10125 if (!impl) return xpath_string();
10126
10127 #ifdef PUGIXML_NO_EXCEPTIONS
10128 if (setjmp(sd.error_handler)) return xpath_string();
10129 #endif
10130
10131 xpath_context c(n, 1, 1);
10132
10133 return impl->root->eval_string(c, sd.stack);
10134 }
10136
10137namespace pugi
10138{
10139#ifndef PUGIXML_NO_EXCEPTIONS
10140 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
10141 {
10142 assert(_result.error);
10143 }
10144
10145 PUGI__FN const char* xpath_exception::what() const throw()
10146 {
10147 return _result.error;
10148 }
10149
10150 PUGI__FN const xpath_parse_result& xpath_exception::result() const
10151 {
10152 return _result;
10153 }
10154#endif
10155
10156 PUGI__FN xpath_node::xpath_node()
10157 {
10158 }
10159
10160 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
10161 {
10162 }
10163
10164 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
10165 {
10166 }
10167
10168 PUGI__FN xml_node xpath_node::node() const
10169 {
10170 return _attribute ? xml_node() : _node;
10171 }
10172
10173 PUGI__FN xml_attribute xpath_node::attribute() const
10174 {
10175 return _attribute;
10176 }
10177
10178 PUGI__FN xml_node xpath_node::parent() const
10179 {
10180 return _attribute ? _node : _node.parent();
10181 }
10182
10183 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
10184 {
10185 }
10186
10187 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
10188 {
10189 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
10190 }
10191
10192 PUGI__FN bool xpath_node::operator!() const
10193 {
10194 return !(_node || _attribute);
10195 }
10196
10197 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
10198 {
10199 return _node == n._node && _attribute == n._attribute;
10200 }
10201
10202 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
10203 {
10204 return _node != n._node || _attribute != n._attribute;
10205 }
10206
10207#ifdef __BORLANDC__
10208 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
10209 {
10210 return (bool)lhs && rhs;
10211 }
10212
10213 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
10214 {
10215 return (bool)lhs || rhs;
10216 }
10217#endif
10218
10219 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
10220 {
10221 assert(begin_ <= end_);
10222
10223 size_t size_ = static_cast<size_t>(end_ - begin_);
10224
10225 if (size_ <= 1)
10226 {
10227 // deallocate old buffer
10228 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
10229
10230 // use internal buffer
10231 if (begin_ != end_) _storage = *begin_;
10232
10233 _begin = &_storage;
10234 _end = &_storage + size_;
10235 }
10236 else
10237 {
10238 // make heap copy
10239 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
10240
10241 if (!storage)
10242 {
10243 #ifdef PUGIXML_NO_EXCEPTIONS
10244 return;
10245 #else
10246 throw std::bad_alloc();
10247 #endif
10248 }
10249
10250 memcpy(storage, begin_, size_ * sizeof(xpath_node));
10251
10252 // deallocate old buffer
10253 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
10254
10255 // finalize
10256 _begin = storage;
10257 _end = storage + size_;
10258 }
10259 }
10260
10261 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
10262 {
10263 }
10264
10265 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
10266 {
10267 _assign(begin_, end_);
10268 }
10269
10270 PUGI__FN xpath_node_set::~xpath_node_set()
10271 {
10272 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
10273 }
10274
10275 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
10276 {
10277 _assign(ns._begin, ns._end);
10278 }
10279
10280 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
10281 {
10282 if (this == &ns) return *this;
10283
10284 _type = ns._type;
10285 _assign(ns._begin, ns._end);
10286
10287 return *this;
10288 }
10289
10290 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
10291 {
10292 return _type;
10293 }
10294
10295 PUGI__FN size_t xpath_node_set::size() const
10296 {
10297 return _end - _begin;
10298 }
10299
10300 PUGI__FN bool xpath_node_set::empty() const
10301 {
10302 return _begin == _end;
10303 }
10304
10305 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
10306 {
10307 assert(index < size());
10308 return _begin[index];
10309 }
10310
10311 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
10312 {
10313 return _begin;
10314 }
10315
10316 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
10317 {
10318 return _end;
10319 }
10320
10321 PUGI__FN void xpath_node_set::sort(bool reverse)
10322 {
10323 _type = impl::xpath_sort(_begin, _end, _type, reverse);
10324 }
10325
10326 PUGI__FN xpath_node xpath_node_set::first() const
10327 {
10328 return impl::xpath_first(_begin, _end, _type);
10329 }
10330
10331 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
10332 {
10333 }
10334
10335 PUGI__FN xpath_parse_result::operator bool() const
10336 {
10337 return error == 0;
10338 }
10339
10340 PUGI__FN const char* xpath_parse_result::description() const
10341 {
10342 return error ? error : "No error";
10343 }
10344
10345 PUGI__FN xpath_variable::xpath_variable(): _type(xpath_type_none), _next(0)
10346 {
10347 }
10348
10349 PUGI__FN const char_t* xpath_variable::name() const
10350 {
10351 switch (_type)
10352 {
10353 case xpath_type_node_set:
10354 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
10355
10356 case xpath_type_number:
10357 return static_cast<const impl::xpath_variable_number*>(this)->name;
10358
10359 case xpath_type_string:
10360 return static_cast<const impl::xpath_variable_string*>(this)->name;
10361
10362 case xpath_type_boolean:
10363 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
10364
10365 default:
10366 assert(!"Invalid variable type");
10367 return 0;
10368 }
10369 }
10370
10371 PUGI__FN xpath_value_type xpath_variable::type() const
10372 {
10373 return _type;
10374 }
10375
10376 PUGI__FN bool xpath_variable::get_boolean() const
10377 {
10378 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
10379 }
10380
10381 PUGI__FN double xpath_variable::get_number() const
10382 {
10383 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
10384 }
10385
10386 PUGI__FN const char_t* xpath_variable::get_string() const
10387 {
10388 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
10389 return value ? value : PUGIXML_TEXT("");
10390 }
10391
10392 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
10393 {
10394 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
10395 }
10396
10397 PUGI__FN bool xpath_variable::set(bool value)
10398 {
10399 if (_type != xpath_type_boolean) return false;
10400
10401 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
10402 return true;
10403 }
10404
10405 PUGI__FN bool xpath_variable::set(double value)
10406 {
10407 if (_type != xpath_type_number) return false;
10408
10409 static_cast<impl::xpath_variable_number*>(this)->value = value;
10410 return true;
10411 }
10412
10413 PUGI__FN bool xpath_variable::set(const char_t* value)
10414 {
10415 if (_type != xpath_type_string) return false;
10416
10417 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
10418
10419 // duplicate string
10420 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
10421
10422 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
10423 if (!copy) return false;
10424
10425 memcpy(copy, value, size);
10426
10427 // replace old string
10428 if (var->value) impl::xml_memory::deallocate(var->value);
10429 var->value = copy;
10430
10431 return true;
10432 }
10433
10434 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
10435 {
10436 if (_type != xpath_type_node_set) return false;
10437
10438 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
10439 return true;
10440 }
10441
10442 PUGI__FN xpath_variable_set::xpath_variable_set()
10443 {
10444 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
10445 }
10446
10447 PUGI__FN xpath_variable_set::~xpath_variable_set()
10448 {
10449 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
10450 {
10451 xpath_variable* var = _data[i];
10452
10453 while (var)
10454 {
10455 xpath_variable* next = var->_next;
10456
10457 impl::delete_xpath_variable(var->_type, var);
10458
10459 var = next;
10460 }
10461 }
10462 }
10463
10464 PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
10465 {
10466 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
10467 size_t hash = impl::hash_string(name) % hash_size;
10468
10469 // look for existing variable
10470 for (xpath_variable* var = _data[hash]; var; var = var->_next)
10471 if (impl::strequal(var->name(), name))
10472 return var;
10473
10474 return 0;
10475 }
10476
10477 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
10478 {
10479 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
10480 size_t hash = impl::hash_string(name) % hash_size;
10481
10482 // look for existing variable
10483 for (xpath_variable* var = _data[hash]; var; var = var->_next)
10484 if (impl::strequal(var->name(), name))
10485 return var->type() == type ? var : 0;
10486
10487 // add new variable
10488 xpath_variable* result = impl::new_xpath_variable(type, name);
10489
10490 if (result)
10491 {
10492 result->_type = type;
10493 result->_next = _data[hash];
10494
10495 _data[hash] = result;
10496 }
10497
10498 return result;
10499 }
10500
10501 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
10502 {
10503 xpath_variable* var = add(name, xpath_type_boolean);
10504 return var ? var->set(value) : false;
10505 }
10506
10507 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
10508 {
10509 xpath_variable* var = add(name, xpath_type_number);
10510 return var ? var->set(value) : false;
10511 }
10512
10513 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
10514 {
10515 xpath_variable* var = add(name, xpath_type_string);
10516 return var ? var->set(value) : false;
10517 }
10518
10519 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
10520 {
10521 xpath_variable* var = add(name, xpath_type_node_set);
10522 return var ? var->set(value) : false;
10523 }
10524
10525 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
10526 {
10527 return find(name);
10528 }
10529
10530 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
10531 {
10532 return find(name);
10533 }
10534
10535 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
10536 {
10537 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
10538
10539 if (!qimpl)
10540 {
10541 #ifdef PUGIXML_NO_EXCEPTIONS
10542 _result.error = "Out of memory";
10543 #else
10544 throw std::bad_alloc();
10545 #endif
10546 }
10547 else
10548 {
10549 impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
10550
10551 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
10552
10553 if (qimpl->root)
10554 {
10555 _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
10556 _result.error = 0;
10557 }
10558 }
10559 }
10560
10561 PUGI__FN xpath_query::~xpath_query()
10562 {
10563 impl::xpath_query_impl::destroy(_impl);
10564 }
10565
10566 PUGI__FN xpath_value_type xpath_query::return_type() const
10567 {
10568 if (!_impl) return xpath_type_none;
10569
10570 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
10571 }
10572
10573 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
10574 {
10575 if (!_impl) return false;
10576
10577 impl::xpath_context c(n, 1, 1);
10578 impl::xpath_stack_data sd;
10579
10580 #ifdef PUGIXML_NO_EXCEPTIONS
10581 if (setjmp(sd.error_handler)) return false;
10582 #endif
10583
10584 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
10585 }
10586
10587 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
10588 {
10589 if (!_impl) return impl::gen_nan();
10590
10591 impl::xpath_context c(n, 1, 1);
10592 impl::xpath_stack_data sd;
10593
10594 #ifdef PUGIXML_NO_EXCEPTIONS
10595 if (setjmp(sd.error_handler)) return impl::gen_nan();
10596 #endif
10597
10598 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
10599 }
10600
10601#ifndef PUGIXML_NO_STL
10602 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
10603 {
10604 impl::xpath_stack_data sd;
10605
10606 return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
10607 }
10608#endif
10609
10610 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
10611 {
10612 impl::xpath_stack_data sd;
10613
10614 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
10615
10616 size_t full_size = r.length() + 1;
10617
10618 if (capacity > 0)
10619 {
10620 size_t size = (full_size < capacity) ? full_size : capacity;
10621 assert(size > 0);
10622
10623 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
10624 buffer[size - 1] = 0;
10625 }
10626
10627 return full_size;
10628 }
10629
10630 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
10631 {
10632 if (!_impl) return xpath_node_set();
10633
10634 impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
10635
10636 if (root->rettype() != xpath_type_node_set)
10637 {
10638 #ifdef PUGIXML_NO_EXCEPTIONS
10639 return xpath_node_set();
10640 #else
10641 xpath_parse_result res;
10642 res.error = "Expression does not evaluate to node set";
10643
10644 throw xpath_exception(res);
10645 #endif
10646 }
10647
10648 impl::xpath_context c(n, 1, 1);
10649 impl::xpath_stack_data sd;
10650
10651 #ifdef PUGIXML_NO_EXCEPTIONS
10652 if (setjmp(sd.error_handler)) return xpath_node_set();
10653 #endif
10654
10655 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
10656
10657 return xpath_node_set(r.begin(), r.end(), r.type());
10658 }
10659
10660 PUGI__FN const xpath_parse_result& xpath_query::result() const
10661 {
10662 return _result;
10663 }
10664
10665 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
10666 {
10667 }
10668
10669 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
10670 {
10671 return _impl ? unspecified_bool_xpath_query : 0;
10672 }
10673
10674 PUGI__FN bool xpath_query::operator!() const
10675 {
10676 return !_impl;
10677 }
10678
10679 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
10680 {
10681 xpath_query q(query, variables);
10682 return select_single_node(q);
10683 }
10684
10685 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
10686 {
10687 xpath_node_set s = query.evaluate_node_set(*this);
10688 return s.empty() ? xpath_node() : s.first();
10689 }
10690
10691 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
10692 {
10693 xpath_query q(query, variables);
10694 return select_nodes(q);
10695 }
10696
10697 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
10698 {
10699 return query.evaluate_node_set(*this);
10700 }
10701}
10702
10703#endif
10704
10705#ifdef __BORLANDC__
10706# pragma option pop
10707#endif
10708
10709// Intel C++ does not properly keep warning state for function templates,
10710// so popping warning state at the end of translation unit leads to warnings in the middle.
10711#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
10712# pragma warning(pop)
10713#endif
10714
10715// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
10716#undef PUGI__NO_INLINE
10717#undef PUGI__UNLIKELY
10718#undef PUGI__STATIC_ASSERT
10719#undef PUGI__DMC_VOLATILE
10720#undef PUGI__MSVC_CRT_VERSION
10721#undef PUGI__NS_BEGIN
10722#undef PUGI__NS_END
10723#undef PUGI__FN
10724#undef PUGI__FN_NO_INLINE
10725#undef PUGI__IS_CHARTYPE_IMPL
10726#undef PUGI__IS_CHARTYPE
10727#undef PUGI__IS_CHARTYPEX
10728#undef PUGI__ENDSWITH
10729#undef PUGI__SKIPWS
10730#undef PUGI__OPTSET
10731#undef PUGI__PUSHNODE
10732#undef PUGI__POPNODE
10733#undef PUGI__SCANFOR
10734#undef PUGI__SCANWHILE
10735#undef PUGI__SCANWHILE_UNROLL
10736#undef PUGI__ENDSEG
10737#undef PUGI__THROW_ERROR
10738#undef PUGI__CHECK_ERROR
10739
10740#endif
10741
void write(char_t d0)
Definition pugixml.cpp:3108
union xml_buffered_writer::@6 scratch
xml_writer & writer
Definition pugixml.cpp:3196
xml_buffered_writer(xml_writer &writer_, xml_encoding user_encoding)
Definition pugixml.cpp:3028
void write(char_t d0, char_t d1)
Definition pugixml.cpp:3116
xml_buffered_writer(const xml_buffered_writer &)
char_t data_char[bufcapacity]
Definition pugixml.cpp:3193
xml_buffered_writer & operator=(const xml_buffered_writer &)
char_t buffer[bufcapacity]
Definition pugixml.cpp:3186
void flush(const char_t *data, size_t size)
Definition pugixml.cpp:3044
uint16_t data_u16[2 *bufcapacity]
Definition pugixml.cpp:3191
void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
Definition pugixml.cpp:3146
xml_encoding encoding
Definition pugixml.cpp:3198
uint8_t data_u8[4 *bufcapacity]
Definition pugixml.cpp:3190
void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
Definition pugixml.cpp:3158
void write(const char_t *data)
Definition pugixml.cpp:3103
void write(char_t d0, char_t d1, char_t d2, char_t d3)
Definition pugixml.cpp:3135
void write(char_t d0, char_t d1, char_t d2)
Definition pugixml.cpp:3125
void write(const char_t *data, size_t length)
Definition pugixml.cpp:3062
uint32_t data_u32[bufcapacity]
Definition pugixml.cpp:3192
xpath_allocator(xpath_memory_block *root, size_t root_size=0)
Definition pugixml.cpp:6210
void * reallocate(void *ptr, size_t old_size, size_t new_size)
Definition pugixml.cpp:6267
void * allocate(size_t size)
Definition pugixml.cpp:6250
void * allocate_nothrow(size_t size)
Definition pugixml.cpp:6217
xpath_memory_block * _root
Definition pugixml.cpp:6202
void revert(const xpath_allocator &state)
Definition pugixml.cpp:6312
xpath_variable * variable
Definition pugixml.cpp:7941
xpath_ast_node * _next
Definition pugixml.cpp:7932
const char_t * string
Definition pugixml.cpp:7937
void set_next(xpath_ast_node *value)
Definition pugixml.cpp:8551
xpath_value_type rettype() const
Definition pugixml.cpp:9252
xpath_node_set_raw eval_node_set(const xpath_context &c, const xpath_stack &stack)
Definition pugixml.cpp:9094
const char_t * nodetest
Definition pugixml.cpp:7943
xpath_string eval_string(const xpath_context &c, const xpath_stack &stack)
Definition pugixml.cpp:8876
void step_push(xpath_node_set_raw &ns, const xml_node &n, xpath_allocator *alloc)
Definition pugixml.cpp:8175
void step_fill(xpath_node_set_raw &ns, const xml_node &n, xpath_allocator *alloc, T)
Definition pugixml.cpp:8224
xpath_ast_node(ast_type_t type, xpath_ast_node *left, axis_t axis, nodetest_t test, const char_t *contents)
Definition pugixml.cpp:8545
xpath_node_set_raw step_do(const xpath_context &c, const xpath_stack &stack, T v)
Definition pugixml.cpp:8470
xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable *value)
Definition pugixml.cpp:8533
double eval_number(const xpath_context &c, const xpath_stack &stack)
Definition pugixml.cpp:8690
static bool compare_eq(xpath_ast_node *lhs, xpath_ast_node *rhs, const xpath_context &c, const xpath_stack &stack, const Comp &comp)
Definition pugixml.cpp:7949
void step_push(xpath_node_set_raw &ns, const xml_attribute &a, const xml_node &parent, xpath_allocator *alloc)
Definition pugixml.cpp:8144
void step_fill(xpath_node_set_raw &ns, const xml_attribute &a, const xml_node &p, xpath_allocator *alloc, T v)
Definition pugixml.cpp:8394
xpath_string eval_string_concat(const xpath_context &c, const xpath_stack &stack)
Definition pugixml.cpp:8827
xpath_ast_node * _right
Definition pugixml.cpp:7931
bool eval_boolean(const xpath_context &c, const xpath_stack &stack)
Definition pugixml.cpp:8561
static bool compare_rel(xpath_ast_node *lhs, xpath_ast_node *rhs, const xpath_context &c, const xpath_stack &stack, const Comp &comp)
Definition pugixml.cpp:8037
xpath_ast_node & operator=(const xpath_ast_node &)
xpath_ast_node * _left
Definition pugixml.cpp:7930
xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value)
Definition pugixml.cpp:8526
xpath_ast_node(const xpath_ast_node &)
void apply_predicates(xpath_node_set_raw &ns, size_t first, const xpath_stack &stack)
Definition pugixml.cpp:8134
xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t *value)
Definition pugixml.cpp:8519
union xpath_ast_node::@7 _data
xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node *left=0, xpath_ast_node *right=0)
Definition pugixml.cpp:8540
void apply_predicate(xpath_node_set_raw &ns, size_t first, xpath_ast_node *expr, const xpath_stack &stack)
Definition pugixml.cpp:8108
void set_right(xpath_ast_node *value)
Definition pugixml.cpp:8556
const char_t * _cur_lexeme_pos
Definition pugixml.cpp:7515
const char_t * current_pos() const
Definition pugixml.cpp:7805
lexeme_t _cur_lexeme
Definition pugixml.cpp:7518
void next()
Definition pugixml.cpp:7531
lexeme_t current() const
Definition pugixml.cpp:7800
xpath_lexer_string _cur_lexeme_contents
Definition pugixml.cpp:7516
const xpath_lexer_string & contents() const
Definition pugixml.cpp:7810
xpath_lexer(const char_t *query)
Definition pugixml.cpp:7521
const char_t * state() const
Definition pugixml.cpp:7526
const char_t * _cur
Definition pugixml.cpp:7514
xpath_node_set::type_t _type
Definition pugixml.cpp:7341
void append(const xpath_node *begin_, const xpath_node *end_, xpath_allocator *alloc)
Definition pugixml.cpp:7399
size_t size() const
Definition pugixml.cpp:7367
xpath_node_set::type_t type() const
Definition pugixml.cpp:7441
xpath_node * end() const
Definition pugixml.cpp:7357
void push_back(const xpath_node &node, xpath_allocator *alloc)
Definition pugixml.cpp:7377
xpath_node * _eos
Definition pugixml.cpp:7345
xpath_node first() const
Definition pugixml.cpp:7372
xpath_node * _begin
Definition pugixml.cpp:7343
bool empty() const
Definition pugixml.cpp:7362
xpath_node * begin() const
Definition pugixml.cpp:7352
void truncate(xpath_node *pos)
Definition pugixml.cpp:7426
void set_type(xpath_node_set::type_t value)
Definition pugixml.cpp:7446
xpath_node * _end
Definition pugixml.cpp:7344
bool empty() const
Definition pugixml.cpp:6506
bool uses_heap() const
Definition pugixml.cpp:6521
static char_t * duplicate_string(const char_t *string, xpath_allocator *alloc)
Definition pugixml.cpp:6418
size_t length() const
Definition pugixml.cpp:6489
bool operator!=(const xpath_string &o) const
Definition pugixml.cpp:6516
void append(const xpath_string &o, xpath_allocator *alloc)
Definition pugixml.cpp:6450
xpath_string(const char_t *str, xpath_allocator *alloc)
Definition pugixml.cpp:6428
char_t * data(xpath_allocator *alloc)
Definition pugixml.cpp:6494
bool operator==(const xpath_string &o) const
Definition pugixml.cpp:6511
xpath_string(const char_t *begin, const char_t *end, xpath_allocator *alloc)
Definition pugixml.cpp:6440
const char_t * _buffer
Definition pugixml.cpp:6404
xpath_string(const char_t *str, bool use_heap)
Definition pugixml.cpp:6436
static char_t * duplicate_string(const char_t *string, size_t length, xpath_allocator *alloc)
Definition pugixml.cpp:6407
const char_t * c_str() const
Definition pugixml.cpp:6484
PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t *str)
Definition pugixml.cpp:5871
static PUGI__FN void unspecified_bool_xpath_query(xpath_query ***)
static PUGI__FN void unspecified_bool_xml_node(xml_node ***)
Definition pugixml.cpp:4356
static PUGI__FN void unspecified_bool_xml_attribute(xml_attribute ***)
Definition pugixml.cpp:4113
PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
Definition pugixml.cpp:5896
PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
Definition pugixml.cpp:5907
PUGI__FN std::basic_string< wchar_t > PUGIXML_FUNCTION as_wide(const char *str)
Definition pugixml.cpp:5883
PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
Definition pugixml.cpp:5902
static PUGI__FN void unspecified_bool_xpath_node(xpath_node ***)
static PUGI__FN void unspecified_bool_xml_text(xml_text ***)
Definition pugixml.cpp:5229
void destroy_node(xml_node_struct *n, xml_allocator &alloc)
Definition pugixml.cpp:584
#define PUGI__CHECK_ERROR(err, m)
Definition pugixml.cpp:1909
PUGI__FN void default_deallocate(void *ptr)
Definition pugixml.cpp:155
PUGI__FN size_t strlength_wide(const wchar_t *s)
Definition pugixml.cpp:210
#define PUGI__OPTSET(OPT)
Definition pugixml.cpp:1901
PUGI__FN double round_nearest_nzero(double value)
Definition pugixml.cpp:7010
PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t *data, size_t size)
Definition pugixml.cpp:1515
PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct *doc, xml_node_struct *root, void *contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t **out_buffer)
Definition pugixml.cpp:4011
PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl *impl, const xpath_node &n, xpath_stack_data &sd)
ast_type_t
Definition pugixml.cpp:7819
@ ast_op_and
Definition pugixml.cpp:7822
@ ast_number_constant
Definition pugixml.cpp:7840
@ ast_filter
Definition pugixml.cpp:7837
@ ast_func_substring_3
Definition pugixml.cpp:7860
@ ast_func_sum
Definition pugixml.cpp:7873
@ ast_func_name_1
Definition pugixml.cpp:7851
@ ast_func_floor
Definition pugixml.cpp:7874
@ ast_op_divide
Definition pugixml.cpp:7832
@ ast_func_concat
Definition pugixml.cpp:7854
@ ast_op_equal
Definition pugixml.cpp:7823
@ ast_unknown
Definition pugixml.cpp:7820
@ ast_func_name_0
Definition pugixml.cpp:7850
@ ast_predicate
Definition pugixml.cpp:7836
@ ast_filter_posinv
Definition pugixml.cpp:7838
@ ast_func_not
Definition pugixml.cpp:7867
@ ast_variable
Definition pugixml.cpp:7841
@ ast_func_string_1
Definition pugixml.cpp:7853
@ ast_func_string_0
Definition pugixml.cpp:7852
@ ast_func_number_0
Definition pugixml.cpp:7871
@ ast_op_union
Definition pugixml.cpp:7835
@ ast_func_substring_before
Definition pugixml.cpp:7857
@ ast_func_string_length_0
Definition pugixml.cpp:7861
@ ast_func_local_name_1
Definition pugixml.cpp:7847
@ ast_func_namespace_uri_0
Definition pugixml.cpp:7848
@ ast_func_lang
Definition pugixml.cpp:7870
@ ast_func_true
Definition pugixml.cpp:7868
@ ast_func_normalize_space_1
Definition pugixml.cpp:7864
@ ast_op_not_equal
Definition pugixml.cpp:7824
@ ast_func_contains
Definition pugixml.cpp:7856
@ ast_op_greater
Definition pugixml.cpp:7826
@ ast_op_negate
Definition pugixml.cpp:7834
@ ast_func_substring_2
Definition pugixml.cpp:7859
@ ast_func_position
Definition pugixml.cpp:7843
@ ast_string_constant
Definition pugixml.cpp:7839
@ ast_func_ceiling
Definition pugixml.cpp:7875
@ ast_op_subtract
Definition pugixml.cpp:7830
@ ast_func_last
Definition pugixml.cpp:7842
@ ast_func_normalize_space_0
Definition pugixml.cpp:7863
@ ast_func_boolean
Definition pugixml.cpp:7866
@ ast_op_less_or_equal
Definition pugixml.cpp:7827
@ ast_step
Definition pugixml.cpp:7877
@ ast_op_multiply
Definition pugixml.cpp:7831
@ ast_func_count
Definition pugixml.cpp:7844
@ ast_func_substring_after
Definition pugixml.cpp:7858
@ ast_func_namespace_uri_1
Definition pugixml.cpp:7849
@ ast_step_root
Definition pugixml.cpp:7878
@ ast_func_translate
Definition pugixml.cpp:7865
@ ast_func_round
Definition pugixml.cpp:7876
@ ast_func_number_1
Definition pugixml.cpp:7872
@ ast_func_string_length_1
Definition pugixml.cpp:7862
@ ast_func_starts_with
Definition pugixml.cpp:7855
@ ast_op_add
Definition pugixml.cpp:7829
@ ast_op_or
Definition pugixml.cpp:7821
@ ast_op_greater_or_equal
Definition pugixml.cpp:7828
@ ast_func_false
Definition pugixml.cpp:7869
@ ast_func_local_name_0
Definition pugixml.cpp:7846
@ ast_op_mod
Definition pugixml.cpp:7833
@ ast_func_id
Definition pugixml.cpp:7845
@ ast_op_less
Definition pugixml.cpp:7825
PUGI__FN xml_encoding get_write_native_encoding()
Definition pugixml.cpp:2861
PUGI__FN const char_t * qualified_name(const xpath_node &node)
Definition pugixml.cpp:7017
PUGI__FN bool convert_number_to_boolean(double value)
Definition pugixml.cpp:6812
char_t *(* strconv_attribute_t)(char_t *, char_t)
Definition pugixml.cpp:2038
#define PUGI__SCANWHILE_UNROLL(X)
Definition pugixml.cpp:1906
PUGI__FN void as_utf8_end(char *buffer, size_t size, const wchar_t *str, size_t length)
Definition pugixml.cpp:1600
wchar_selector< sizeof(wchar_t)>::writer wchar_writer
Definition pugixml.cpp:950
PUGI__FN xml_encoding get_wchar_encoding()
Definition pugixml.cpp:1205
lexeme_t
Definition pugixml.cpp:7465
@ lex_multiply
Definition pugixml.cpp:7475
@ lex_double_slash
Definition pugixml.cpp:7483
@ lex_quoted_string
Definition pugixml.cpp:7480
@ lex_not_equal
Definition pugixml.cpp:7468
@ lex_axis_attribute
Definition pugixml.cpp:7488
@ lex_less
Definition pugixml.cpp:7469
@ lex_equal
Definition pugixml.cpp:7467
@ lex_greater_or_equal
Definition pugixml.cpp:7472
@ lex_none
Definition pugixml.cpp:7466
@ lex_union
Definition pugixml.cpp:7476
@ lex_comma
Definition pugixml.cpp:7487
@ lex_close_brace
Definition pugixml.cpp:7479
@ lex_slash
Definition pugixml.cpp:7482
@ lex_var_ref
Definition pugixml.cpp:7477
@ lex_dot
Definition pugixml.cpp:7489
@ lex_minus
Definition pugixml.cpp:7474
@ lex_string
Definition pugixml.cpp:7486
@ lex_eof
Definition pugixml.cpp:7492
@ lex_plus
Definition pugixml.cpp:7473
@ lex_number
Definition pugixml.cpp:7481
@ lex_greater
Definition pugixml.cpp:7470
@ lex_double_dot
Definition pugixml.cpp:7490
@ lex_less_or_equal
Definition pugixml.cpp:7471
@ lex_close_square_brace
Definition pugixml.cpp:7485
@ lex_open_square_brace
Definition pugixml.cpp:7484
@ lex_open_brace
Definition pugixml.cpp:7478
@ lex_double_colon
Definition pugixml.cpp:7491
PUGI__FN FILE * open_file_wide(const wchar_t *path, const wchar_t *mode)
Definition pugixml.cpp:3977
PUGI__FN void text_output_cdata(xml_buffered_writer &writer, const char_t *s)
Definition pugixml.cpp:3250
bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t *target)
Definition pugixml.cpp:1654
PUGI__FN bool get_mutable_buffer(char_t *&out_buffer, size_t &out_length, const void *contents, size_t size, bool is_mutable)
Definition pugixml.cpp:1264
static const uintptr_t xml_memory_page_type_mask
Definition pugixml.cpp:273
PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator *alloc)
Definition pugixml.cpp:6871
PUGI__FN bool is_little_endian()
Definition pugixml.cpp:1198
PUGI__FN unsigned int hash_string(const char_t *str)
Definition pugixml.cpp:7188
PUGI__FN size_t zero_terminate_buffer(void *buffer, size_t size, xml_encoding encoding)
Definition pugixml.cpp:3740
PUGI__FN char_t * strconv_comment(char_t *s, char_t endch)
Definition pugixml.cpp:1911
#define PUGI__IS_CHARTYPE(c, ct)
Definition pugixml.cpp:1195
PUGI__FN double gen_nan()
Definition pugixml.cpp:6754
PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
Definition pugixml.cpp:2189
PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
Definition pugixml.cpp:2020
PUGI__FN bool convert_buffer(char_t *&out_buffer, size_t &out_length, xml_encoding encoding, const void *contents, size_t size, bool is_mutable)
Definition pugixml.cpp:1561
PUGI__FN void convert_utf_endian_swap(T *result, const T *data, size_t length)
Definition pugixml.cpp:1111
PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream< T > &stream, void **out_buffer, size_t *out_size)
Definition pugixml.cpp:3891
PUGI__FN xml_parse_result load_stream_impl(xml_document &doc, std::basic_istream< T > &stream, unsigned int options, xml_encoding encoding)
Definition pugixml.cpp:3927
PUGI__FN bool set_value_convert(char_t *&dest, uintptr_t &header, uintptr_t header_mask, int value)
Definition pugixml.cpp:3653
I min_element(I begin, I end, const Pred &pred)
Definition pugixml.cpp:5998
#define PUGI__ENDSEG()
Definition pugixml.cpp:1907
static const uintptr_t xml_memory_page_pointer_mask
Definition pugixml.cpp:270
PUGI__FN char_t * strconv_escape(char_t *s, gap &g)
Definition pugixml.cpp:1758
void sort(I begin, I end, const Pred &pred)
Definition pugixml.cpp:6153
void destroy_attribute(xml_attribute_struct *a, xml_allocator &alloc)
Definition pugixml.cpp:574
PUGI__FN bool convert_buffer_utf32(char_t *&out_buffer, size_t &out_length, const void *contents, size_t size, opt_swap)
Definition pugixml.cpp:1490
wchar_selector< sizeof(wchar_t)>::counter wchar_counter
Definition pugixml.cpp:949
void median(I first, I middle, I last, const Pred &pred)
Definition pugixml.cpp:6134
PUGI__NS_END PUGI__NS_BEGIN PUGI__FN bool starts_with(const char_t *string, const char_t *pattern)
Definition pugixml.cpp:6534
#define PUGI__IS_CHARTYPEX(c, ct)
Definition pugixml.cpp:1196
I unique(I begin, I end)
Definition pugixml.cpp:6014
bool has_declaration(const xml_node &node)
Definition pugixml.cpp:3417
void insertion_sort(I begin, I end, const Pred &pred, T *)
Definition pugixml.cpp:6042
static const unsigned char chartypex_table[256]
Definition pugixml.cpp:1167
PUGI__FN bool check_string_to_number_format(const char_t *string)
Definition pugixml.cpp:6938
void prepend_node(xml_node_struct *child, xml_node_struct *node)
Definition pugixml.cpp:635
PUGI__FN_NO_INLINE xml_attribute_struct * append_new_attribute(xml_node_struct *node, xml_allocator &alloc)
Definition pugixml.cpp:712
PUGI__FN T * new_xpath_variable(const char_t *name)
Definition pugixml.cpp:7207
PUGI__FN const char_t * find_char(const char_t *s, char_t c)
Definition pugixml.cpp:6545
PUGI__FN size_t get_valid_length(const char_t *data, size_t length)
Definition pugixml.cpp:2961
void insert_node_before(xml_node_struct *child, xml_node_struct *node)
Definition pugixml.cpp:670
PUGI__FN std::string as_utf8_impl(const wchar_t *str, size_t length)
Definition pugixml.cpp:1614
xml_allocator & get_allocator(const xml_node_struct *node)
Definition pugixml.cpp:548
xml_memory_management_function_storage< int > xml_memory
Definition pugixml.cpp:170
PUGI__FN xml_parse_status get_file_size(FILE *file, size_t &out_result)
Definition pugixml.cpp:3701
PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
Definition pugixml.cpp:6633
char_t *(* strconv_pcdata_t)(char_t *)
Definition pugixml.cpp:1967
void remove_node(xml_node_struct *node)
Definition pugixml.cpp:687
static const unsigned char chartype_table[256]
Definition pugixml.cpp:1137
#define PUGI__ENDSWITH(c, e)
Definition pugixml.cpp:1899
PUGI__NS_END PUGI__NS_BEGIN uint16_t endian_swap(uint16_t value)
Definition pugixml.cpp:752
void copy_backwards(I begin, I end, I target)
Definition pugixml.cpp:6037
PUGI__FN unsigned int node_height(xml_node n)
Definition pugixml.cpp:6620
bool is_text_node(xml_node_struct *node)
Definition pugixml.cpp:3516
PUGI__FN xpath_string string_value(const xpath_node &na, xpath_allocator *alloc)
Definition pugixml.cpp:6570
PUGI__FN xml_parse_result load_file_impl(xml_document &doc, FILE *file, unsigned int options, xml_encoding encoding)
Definition pugixml.cpp:3764
void reverse(I begin, I end)
Definition pugixml.cpp:6009
#define PUGI__FN_NO_INLINE
Definition pugixml.cpp:129
#define PUGI__STATIC_ASSERT(cond)
Definition pugixml.cpp:95
#define PUGI__NS_BEGIN
Definition pugixml.cpp:125
PUGI__FN void node_output(xml_buffered_writer &writer, const xml_node &node, const char_t *indent, unsigned int flags, unsigned int depth)
Definition pugixml.cpp:3288
PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream< T > &stream, void **out_buffer, size_t *out_size)
Definition pugixml.cpp:3837
#define PUGI__FN
Definition pugixml.cpp:128
PUGI__FN bool get_value_bool(const char_t *value, bool def)
Definition pugixml.cpp:3585
PUGI__FN_NO_INLINE xml_node_struct * append_new_node(xml_node_struct *node, xml_allocator &alloc, xml_node_type type=node_element)
Definition pugixml.cpp:702
#define PUGI__SKIPWS()
Definition pugixml.cpp:1900
#define PUGI__DMC_VOLATILE
Definition pugixml.cpp:101
PUGI__NS_END PUGI__NS_BEGIN xml_attribute_struct * allocate_attribute(xml_allocator &alloc)
Definition pugixml.cpp:558
PUGI__FN double get_value_double(const char_t *value, double def)
Definition pugixml.cpp:3563
#define PUGI__THROW_ERROR(err, m)
Definition pugixml.cpp:1908
PUGI__NS_END PUGI__NS_BEGIN PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node *begin, xpath_node *end, xpath_node_set::type_t type, bool rev)
Definition pugixml.cpp:7302
PUGI__FN bool allow_move(const xml_node &parent, const xml_node &child)
Definition pugixml.cpp:3439
xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset=0)
Definition pugixml.cpp:2215
PUGI__FN const char_t * namespace_uri(const xml_node &node)
Definition pugixml.cpp:7053
PUGI__FN size_t convert_buffer_output(char_t *, uint8_t *r_u8, uint16_t *r_u16, uint32_t *r_u32, const char_t *data, size_t length, xml_encoding encoding)
Definition pugixml.cpp:2977
#define PUGI__SCANFOR(X)
Definition pugixml.cpp:1904
PUGI__FN const char_t * convert_number_to_string_special(double value)
Definition pugixml.cpp:6780
bool allow_insert_child(xml_node_type parent, xml_node_type child)
Definition pugixml.cpp:3430
static const uintptr_t xml_memory_page_name_allocated_mask
Definition pugixml.cpp:271
PUGI__NS_END static PUGI__NS_BEGIN const size_t xml_memory_page_size
Definition pugixml.cpp:261
PUGI__FN bool convert_string_to_number_scratch(char_t(&buffer)[32], const char_t *begin, const char_t *end, double *out_result)
Definition pugixml.cpp:6981
#define PUGI__NS_END
Definition pugixml.cpp:126
PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
Definition pugixml.cpp:2870
PUGI__FN void convert_number_to_mantissa_exponent(double value, char *buffer, size_t buffer_size, char **out_mantissa, int *out_exponent)
Definition pugixml.cpp:6840
PUGI__FN void delete_xpath_variable(T *var)
Definition pugixml.cpp:7244
PUGI__FN bool convert_buffer_latin1(char_t *&out_buffer, size_t &out_length, const void *contents, size_t size, bool is_mutable)
Definition pugixml.cpp:1524
void swap(T &lhs, T &rhs)
Definition pugixml.cpp:5991
PUGI__NS_END PUGI__NS_BEGIN PUGI__FN size_t strlength(const char_t *s)
Definition pugixml.cpp:176
PUGI__FN char * convert_path_heap(const wchar_t *str)
Definition pugixml.cpp:3959
PUGI__FN void recursive_copy_skip(xml_node &dest, const xml_node &source, const xml_node &skip)
Definition pugixml.cpp:3463
PUGI__FN double round_nearest(double value)
Definition pugixml.cpp:7005
void insert_node_after(xml_node_struct *child, xml_node_struct *node)
Definition pugixml.cpp:653
PUGI__FN void node_output_attributes(xml_buffered_writer &writer, const xml_node &node, unsigned int flags)
Definition pugixml.cpp:3272
nodetest_t
Definition pugixml.cpp:7899
@ nodetest_all
Definition pugixml.cpp:7907
@ nodetest_name
Definition pugixml.cpp:7901
@ nodetest_none
Definition pugixml.cpp:7900
@ nodetest_type_text
Definition pugixml.cpp:7905
@ nodetest_all_in_namespace
Definition pugixml.cpp:7908
@ nodetest_pi
Definition pugixml.cpp:7906
@ nodetest_type_comment
Definition pugixml.cpp:7903
@ nodetest_type_node
Definition pugixml.cpp:7902
@ nodetest_type_pi
Definition pugixml.cpp:7904
static const uintptr_t xml_memory_page_alignment
Definition pugixml.cpp:269
PUGI__FN bool set_value_buffer(char_t *&dest, uintptr_t &header, uintptr_t header_mask, char(&buf)[128])
Definition pugixml.cpp:3641
#define PUGI__POPNODE()
Definition pugixml.cpp:1903
PUGI__FN bool strcpy_insitu(char_t *&dest, uintptr_t &header, uintptr_t header_mask, const char_t *source)
Definition pugixml.cpp:1668
void partition(I begin, I middle, I end, const Pred &pred, I *out_eqbeg, I *out_eqend)
Definition pugixml.cpp:6074
PUGI__FN void normalize_space(char_t *buffer)
Definition pugixml.cpp:7097
PUGI__FN const char_t * find_substring(const char_t *s, const char_t *p)
Definition pugixml.cpp:6554
PUGI__FN bool strequalrange(const char_t *lhs, const char_t *rhs, size_t count)
Definition pugixml.cpp:200
PUGI__FN bool save_file_impl(const xml_document &doc, FILE *file, const char_t *indent, unsigned int flags, xml_encoding encoding)
Definition pugixml.cpp:3997
PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
Definition pugixml.cpp:1215
PUGI__FN double convert_string_to_number(const char_t *string)
Definition pugixml.cpp:6968
PUGI__FN bool is_nan(double value)
Definition pugixml.cpp:6767
static const uintptr_t xml_memory_page_value_allocated_mask
Definition pugixml.cpp:272
PUGI__FN xpath_node xpath_first(const xpath_node *begin, const xpath_node *end, xpath_node_set::type_t type)
Definition pugixml.cpp:7318
#define PUGI__PUSHNODE(TYPE)
Definition pugixml.cpp:1902
static const xpath_node_set dummy_node_set
Definition pugixml.cpp:7186
PUGI__FN size_t as_utf8_begin(const wchar_t *str, size_t length)
Definition pugixml.cpp:1594
PUGI__FN xpath_variable * get_variable_scratch(char_t(&buffer)[32], xpath_variable_set *set, const char_t *begin, const char_t *end)
Definition pugixml.cpp:7275
PUGI__FN void text_output(xml_buffered_writer &writer, const char_t *s, chartypex_t type, unsigned int flags)
Definition pugixml.cpp:3242
PUGI__FN void translate(char_t *buffer, const char_t *from, const char_t *to)
Definition pugixml.cpp:7123
xml_node_struct * allocate_node(xml_allocator &alloc, xml_node_type type)
Definition pugixml.cpp:566
#define PUGI__SCANWHILE(X)
Definition pugixml.cpp:1905
PUGI__FN void truncate_zeros(char *begin, char *end)
Definition pugixml.cpp:6817
PUGI__FN unsigned int get_value_uint(const char_t *value, unsigned int def)
Definition pugixml.cpp:3550
axis_t
Definition pugixml.cpp:7882
@ axis_preceding
Definition pugixml.cpp:7893
@ axis_ancestor_or_self
Definition pugixml.cpp:7884
@ axis_attribute
Definition pugixml.cpp:7885
@ axis_following_sibling
Definition pugixml.cpp:7890
@ axis_child
Definition pugixml.cpp:7886
@ axis_descendant
Definition pugixml.cpp:7887
@ axis_descendant_or_self
Definition pugixml.cpp:7888
@ axis_self
Definition pugixml.cpp:7895
@ axis_following
Definition pugixml.cpp:7889
@ axis_ancestor
Definition pugixml.cpp:7883
@ axis_preceding_sibling
Definition pugixml.cpp:7894
@ axis_namespace
Definition pugixml.cpp:7891
@ axis_parent
Definition pugixml.cpp:7892
chartype_t
Definition pugixml.cpp:1126
@ ct_parse_comment
Definition pugixml.cpp:1132
@ ct_start_symbol
Definition pugixml.cpp:1134
@ ct_parse_attr
Definition pugixml.cpp:1128
@ ct_parse_attr_ws
Definition pugixml.cpp:1129
@ ct_parse_cdata
Definition pugixml.cpp:1131
@ ct_parse_pcdata
Definition pugixml.cpp:1127
@ ct_space
Definition pugixml.cpp:1130
@ ct_symbol
Definition pugixml.cpp:1133
PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void *contents, size_t size)
Definition pugixml.cpp:1239
PUGI__FN float get_value_float(const char_t *value, float def)
Definition pugixml.cpp:3574
PUGI__FN const char_t * local_name(const xpath_node &node)
Definition pugixml.cpp:7022
void median3(I first, I middle, I last, const Pred &pred)
Definition pugixml.cpp:6127
void append_node(xml_node_struct *child, xml_node_struct *node)
Definition pugixml.cpp:612
PUGI__FN bool strequal(const char_t *src, const char_t *dst)
Definition pugixml.cpp:188
PUGI__FN bool convert_buffer_utf16(char_t *&out_buffer, size_t &out_length, const void *contents, size_t size, opt_swap)
Definition pugixml.cpp:1465
PUGI__FN std::basic_string< wchar_t > as_wide_impl(const char *str, size_t size)
Definition pugixml.cpp:1629
chartypex_t
Definition pugixml.cpp:1159
@ ctx_digit
Definition pugixml.cpp:1163
@ ctx_special_attr
Definition pugixml.cpp:1161
@ ctx_symbol
Definition pugixml.cpp:1164
@ ctx_start_symbol
Definition pugixml.cpp:1162
@ ctx_special_pcdata
Definition pugixml.cpp:1160
PUGI__FN const void * document_order(const xpath_node &xnode)
Definition pugixml.cpp:6667
PUGI__FN char_t * strconv_cdata(char_t *s, char_t endch)
Definition pugixml.cpp:1939
PUGI__FN xpath_string xpath_string_const(const char_t *str)
Definition pugixml.cpp:6527
PUGI__NS_BEGIN PUGI__FN void * default_allocate(size_t size)
Definition pugixml.cpp:150
PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
Definition pugixml.cpp:6660
PUGI__FN int get_value_int(const char_t *value, int def)
Definition pugixml.cpp:3537
PUGI__FN char_t tolower_ascii(char_t ch)
Definition pugixml.cpp:6565
PUGI__FN int get_integer_base(const char_t *value)
Definition pugixml.cpp:3524
PUGI__FN void text_output_escaped(xml_buffered_writer &writer, const char_t *s, chartypex_t type)
Definition pugixml.cpp:3201
unsigned short uint16_t
Definition stdint_msvc.h:80
_W64 unsigned int uintptr_t
unsigned int uint32_t
Definition stdint_msvc.h:81
unsigned char uint8_t
Definition stdint_msvc.h:79
static const axis_t axis
Definition pugixml.cpp:7913
void(* deleter)(void *)
Definition pugixml.cpp:239
void * release()
Definition pugixml.cpp:250
buffer_holder(void *data_, void(*deleter_)(void *))
Definition pugixml.cpp:241
bool operator()(const xpath_node &lhs, const xpath_node &rhs) const
Definition pugixml.cpp:6692
bool operator()(const xpath_node &lhs, const xpath_node &rhs) const
Definition pugixml.cpp:6747
bool operator()(const T &lhs, const T &rhs) const
Definition pugixml.cpp:5961
char_t * end
Definition pugixml.cpp:1718
gap()
Definition pugixml.cpp:1721
char_t * flush(char_t *s)
Definition pugixml.cpp:1744
void push(char_t *&s, size_t count)
Definition pugixml.cpp:1727
size_t size
Definition pugixml.cpp:1719
static value_type high(value_type result, uint32_t ch)
Definition pugixml.cpp:923
static value_type low(value_type result, uint32_t ch)
Definition pugixml.cpp:916
uint8_t * value_type
Definition pugixml.cpp:914
bool operator()(const T &lhs, const T &rhs) const
Definition pugixml.cpp:5985
bool operator()(const T &lhs, const T &rhs) const
Definition pugixml.cpp:5977
namespace_uri_predicate(const char_t *name)
Definition pugixml.cpp:7035
const char_t * prefix
Definition pugixml.cpp:7032
bool operator()(const xml_attribute &a) const
Definition pugixml.cpp:7043
bool operator()(const T &lhs, const T &rhs) const
Definition pugixml.cpp:5969
A 'name=value' XML attribute structure.
Definition pugixml.cpp:490
xml_attribute_struct * prev_attribute_c
Previous attribute (cyclic list)
Definition pugixml.cpp:501
xml_attribute_struct(impl::xml_memory_page *page)
Default ctor.
Definition pugixml.cpp:492
xml_attribute_struct * next_attribute
Next attribute.
Definition pugixml.cpp:502
char_t * name
Pointer to attribute name.
Definition pugixml.cpp:498
char_t * value
Pointer to attribute value.
Definition pugixml.cpp:499
An XML document tree node.
Definition pugixml.cpp:507
char_t * value
Pointer to any associated string data.
Definition pugixml.cpp:519
xml_attribute_struct * first_attribute
First attribute.
Definition pugixml.cpp:526
xml_node_struct * prev_sibling_c
Left brother (cyclic list)
Definition pugixml.cpp:523
xml_node_struct * next_sibling
Right brother.
Definition pugixml.cpp:524
char_t * name
Pointer to element name.
Definition pugixml.cpp:518
xml_node_struct * parent
Pointer to parent.
Definition pugixml.cpp:516
xml_node_struct * first_child
First child.
Definition pugixml.cpp:521
xml_node_struct(impl::xml_memory_page *page, xml_node_type type)
Default ctor.
Definition pugixml.cpp:510
static char_t * parse_eol(char_t *s, char_t end_quote)
Definition pugixml.cpp:2130
static char_t * parse_simple(char_t *s, char_t end_quote)
Definition pugixml.cpp:2162
static char_t * parse_wnorm(char_t *s, char_t end_quote)
Definition pugixml.cpp:2042
static char_t * parse_wconv(char_t *s, char_t end_quote)
Definition pugixml.cpp:2094
static char_t * parse(char_t *s)
Definition pugixml.cpp:1971
size_t value_type
Definition pugixml.cpp:830
static value_type low(value_type result, uint32_t)
Definition pugixml.cpp:832
static value_type high(value_type result, uint32_t)
Definition pugixml.cpp:837
static value_type high(value_type result, uint32_t ch)
Definition pugixml.cpp:854
uint16_t * value_type
Definition pugixml.cpp:845
static value_type low(value_type result, uint32_t ch)
Definition pugixml.cpp:847
static value_type any(value_type result, uint32_t ch)
Definition pugixml.cpp:865
static value_type low(value_type result, uint32_t)
Definition pugixml.cpp:875
size_t value_type
Definition pugixml.cpp:873
static value_type high(value_type result, uint32_t)
Definition pugixml.cpp:880
static value_type low(value_type result, uint32_t ch)
Definition pugixml.cpp:890
uint32_t * value_type
Definition pugixml.cpp:888
static value_type high(value_type result, uint32_t ch)
Definition pugixml.cpp:897
static value_type any(value_type result, uint32_t ch)
Definition pugixml.cpp:904
static value_type low(value_type result, uint32_t ch)
Definition pugixml.cpp:766
static value_type high(value_type result, uint32_t)
Definition pugixml.cpp:776
size_t value_type
Definition pugixml.cpp:764
static value_type any(value_type result, uint32_t ch)
Definition pugixml.cpp:822
static value_type high(value_type result, uint32_t ch)
Definition pugixml.cpp:812
static value_type low(value_type result, uint32_t ch)
Definition pugixml.cpp:787
uint8_t * value_type
Definition pugixml.cpp:785
static Traits::value_type decode_wchar_block_impl(const uint32_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1100
static Traits::value_type decode_latin1_block(const uint8_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1085
static Traits::value_type decode_wchar_block_impl(const uint16_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1095
static Traits::value_type decode_wchar_block(const wchar_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1105
static Traits::value_type decode_utf8_block(const uint8_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:954
static Traits::value_type decode_utf32_block(const uint32_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1060
static Traits::value_type decode_utf16_block(const uint16_t *data, size_t size, typename Traits::value_type result)
Definition pugixml.cpp:1016
utf16_counter counter
Definition pugixml.cpp:938
utf16_writer writer
Definition pugixml.cpp:939
utf32_writer writer
Definition pugixml.cpp:946
utf32_counter counter
Definition pugixml.cpp:945
void * allocate_memory(size_t size, xml_memory_page *&out_page)
Definition pugixml.cpp:348
char_t * allocate_string(size_t length)
Definition pugixml.cpp:396
static void deallocate_page(xml_memory_page *page)
Definition pugixml.cpp:341
void * allocate_memory_oob(size_t size, xml_memory_page *&out_page)
Definition pugixml.cpp:446
xml_memory_page * _root
Definition pugixml.cpp:442
size_t _busy_size
Definition pugixml.cpp:443
void deallocate_memory(void *ptr, size_t size, xml_memory_page *page)
Definition pugixml.cpp:361
xml_memory_page * allocate_page(size_t data_size)
Definition pugixml.cpp:320
xml_allocator(xml_memory_page *root)
Definition pugixml.cpp:316
void deallocate_string(char_t *string)
Definition pugixml.cpp:424
const char_t * buffer
Definition pugixml.cpp:543
xml_document_struct(xml_memory_page *page)
Definition pugixml.cpp:539
xml_extra_buffer * extra_buffers
Definition pugixml.cpp:545
xml_extra_buffer * next
Definition pugixml.cpp:534
char_t * buffer
Definition pugixml.cpp:533
static deallocation_function deallocate
Definition pugixml.cpp:164
static allocation_function allocate
Definition pugixml.cpp:163
xml_memory_page * prev
Definition pugixml.cpp:299
static xml_memory_page * construct(void *memory)
Definition pugixml.cpp:279
xml_memory_page * next
Definition pugixml.cpp:300
size_t freed_size
Definition pugixml.cpp:303
xml_allocator * allocator
Definition pugixml.cpp:295
static char_t * parse_skip_bom(char_t *s)
Definition pugixml.cpp:2787
xml_parse_status error_status
Definition pugixml.cpp:2228
char_t * parse_doctype_primitive(char_t *s)
Definition pugixml.cpp:2241
xml_allocator alloc
Definition pugixml.cpp:2226
char_t * error_offset
Definition pugixml.cpp:2227
char_t * parse_tree(char_t *s, xml_node_struct *root, unsigned int optmsk, char_t endch)
Definition pugixml.cpp:2548
static xml_parse_result parse(char_t *buffer, size_t length, xml_document_struct *xmldoc, xml_node_struct *root, unsigned int optmsk)
Definition pugixml.cpp:2806
static bool has_element_node_siblings(xml_node_struct *node)
Definition pugixml.cpp:2793
char_t * parse_question(char_t *s, xml_node_struct *&ref_cursor, unsigned int optmsk, char_t endch)
Definition pugixml.cpp:2455
char_t * parse_doctype_ignore(char_t *s)
Definition pugixml.cpp:2274
xml_parser(const xml_allocator &alloc_)
Definition pugixml.cpp:2230
char_t * parse_doctype_group(char_t *s, char_t endch, bool toplevel)
Definition pugixml.cpp:2300
char_t * parse_exclamation(char_t *s, xml_node_struct *cursor, unsigned int optmsk, char_t endch)
Definition pugixml.cpp:2344
static xml_stream_chunk * create()
Definition pugixml.cpp:3807
T data[xml_memory_page_size/sizeof(T)]
Definition pugixml.cpp:3834
static void destroy(void *ptr)
Definition pugixml.cpp:3814
xml_stream_chunk * next
Definition pugixml.cpp:3831
xpath_allocator _state
Definition pugixml.cpp:6359
xpath_allocator * _target
Definition pugixml.cpp:6358
xpath_allocator_capture(xpath_allocator *alloc)
Definition pugixml.cpp:6349
xpath_context(const xpath_node &n_, size_t position_, size_t size_)
Definition pugixml.cpp:7459
xpath_node n
Definition pugixml.cpp:7456
size_t position
Definition pugixml.cpp:7457
const char_t * begin
Definition pugixml.cpp:7497
const char_t * end
Definition pugixml.cpp:7498
bool operator==(const char_t *other) const
Definition pugixml.cpp:7504
xpath_memory_block * next
Definition pugixml.cpp:6188
xpath_value_type rettype
Definition pugixml.cpp:9945
binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_)
Definition pugixml.cpp:9952
static binary_op_t parse(xpath_lexer &lexer)
Definition pugixml.cpp:9956
void throw_error(const char *message)
Definition pugixml.cpp:9274
xpath_ast_node * parse_step(xpath_ast_node *set)
Definition pugixml.cpp:9683
xpath_ast_node * parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node *args[2])
Definition pugixml.cpp:9322
xpath_variable_set * _variables
Definition pugixml.cpp:9264
xpath_parser(const char_t *query, xpath_variable_set *variables, xpath_allocator *alloc, xpath_parse_result *result)
xpath_ast_node * parse_primary_expression()
Definition pugixml.cpp:9548
xpath_lexer _lexer
Definition pugixml.cpp:9261
xpath_ast_node * parse_filter_expression()
Definition pugixml.cpp:9653
nodetest_t parse_node_test_type(const xpath_lexer_string &name)
Definition pugixml.cpp:9512
xpath_ast_node * parse_expression()
const char_t * alloc_string(const xpath_lexer_string &value)
Definition pugixml.cpp:9304
void * alloc_node()
Definition pugixml.cpp:9295
xpath_parse_result * _result
Definition pugixml.cpp:9266
xpath_ast_node * parse_function(const xpath_lexer_string &name, size_t argc, xpath_ast_node *args[2])
Definition pugixml.cpp:9331
xpath_ast_node * parse()
char_t _scratch[32]
Definition pugixml.cpp:9268
static xpath_ast_node * parse(const char_t *query, xpath_variable_set *variables, xpath_allocator *alloc, xpath_parse_result *result)
const char_t * _query
Definition pugixml.cpp:9263
xpath_ast_node * parse_path_or_unary_expression()
Definition pugixml.cpp:9883
xpath_allocator * _alloc
Definition pugixml.cpp:9260
axis_t parse_axis_name(const xpath_lexer_string &name, bool &specified)
Definition pugixml.cpp:9444
xpath_ast_node * parse_expression_rec(xpath_ast_node *lhs, int limit)
void throw_error_oom()
Definition pugixml.cpp:9286
xpath_ast_node * parse_relative_location_path(xpath_ast_node *set)
Definition pugixml.cpp:9827
xpath_ast_node * parse_location_path()
Definition pugixml.cpp:9847
xpath_memory_block block
static void destroy(void *ptr)
xpath_ast_node * root
static xpath_query_impl * create()
xpath_allocator alloc
xpath_allocator temp
Definition pugixml.cpp:6372
xpath_memory_block blocks[2]
Definition pugixml.cpp:6370
xpath_allocator result
Definition pugixml.cpp:6371
xpath_stack stack
Definition pugixml.cpp:6373
xpath_allocator * temp
Definition pugixml.cpp:6365
xpath_allocator * result
Definition pugixml.cpp:6364
xpath_node_set value
Definition pugixml.cpp:7182

Generated on Mon Mar 4 2024 21:10:02 for QuickFIX by doxygen 1.9.8 written by Dimitri van Heesch, © 1997-2001