Public Member Functions | Static Public Member Functions | Public Attributes | List of all members
xml_parser Struct Reference
Collaboration diagram for xml_parser:
Collaboration graph
[legend]

Public Member Functions

 xml_parser (const xml_allocator &alloc_)
 
char_t * parse_doctype_primitive (char_t *s)
 
char_t * parse_doctype_ignore (char_t *s)
 
char_t * parse_doctype_group (char_t *s, char_t endch, bool toplevel)
 
char_t * parse_exclamation (char_t *s, xml_node_struct *cursor, unsigned int optmsk, char_t endch)
 
char_t * parse_question (char_t *s, xml_node_struct *&ref_cursor, unsigned int optmsk, char_t endch)
 
char_t * parse_tree (char_t *s, xml_node_struct *root, unsigned int optmsk, char_t endch)
 

Static Public Member Functions

static char_t * parse_skip_bom (char_t *s)
 
static bool has_element_node_siblings (xml_node_struct *node)
 
static xml_parse_result parse (char_t *buffer, size_t length, xml_document_struct *xmldoc, xml_node_struct *root, unsigned int optmsk)
 

Public Attributes

xml_allocator alloc
 
char_t * error_offset
 
xml_parse_status error_status
 

Detailed Description

Definition at line 2224 of file pugixml.cpp.

Constructor & Destructor Documentation

◆ xml_parser()

xml_parser::xml_parser ( const xml_allocator alloc_)
inline

Definition at line 2230 of file pugixml.cpp.

2230 : alloc(alloc_), error_offset(0), error_status(status_ok)
2231 {
2232 }
xml_parse_status error_status
Definition pugixml.cpp:2228
xml_allocator alloc
Definition pugixml.cpp:2226
char_t * error_offset
Definition pugixml.cpp:2227

Member Function Documentation

◆ has_element_node_siblings()

static bool xml_parser::has_element_node_siblings ( xml_node_struct *  node)
inlinestatic

Definition at line 2793 of file pugixml.cpp.

2794 {
2795 while (node)
2796 {
2797 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
2798 if (type == node_element) return true;
2799
2800 node = node->next_sibling;
2801 }
2802
2803 return false;
2804 }

Referenced by parse().

◆ parse()

static xml_parse_result xml_parser::parse ( char_t *  buffer,
size_t  length,
xml_document_struct xmldoc,
xml_node_struct *  root,
unsigned int  optmsk 
)
inlinestatic

Definition at line 2806 of file pugixml.cpp.

2807 {
2808 // allocator object is a part of document object
2809 xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc);
2810
2811 // early-out for empty documents
2812 if (length == 0)
2813 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
2814
2815 // get last child of the root before parsing
2816 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
2817
2818 // create parser on stack
2819 xml_parser parser(alloc);
2820
2821 // save last character and make buffer zero-terminated (speeds up parsing)
2822 char_t endch = buffer[length - 1];
2823 buffer[length - 1] = 0;
2824
2825 // skip BOM to make sure it does not end up as part of parse output
2826 char_t* buffer_data = parse_skip_bom(buffer);
2827
2828 // perform actual parsing
2829 parser.parse_tree(buffer_data, root, optmsk, endch);
2830
2831 // update allocator state
2832 alloc = parser.alloc;
2833
2834 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
2835 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
2836
2837 if (result)
2838 {
2839 // since we removed last character, we have to handle the only possible false positive (stray <)
2840 if (endch == '<')
2841 return make_parse_result(status_unrecognized_tag, length - 1);
2842
2843 // check if there are any element nodes parsed
2844 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
2845
2846 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
2847 return make_parse_result(status_no_document_element, length - 1);
2848 }
2849 else
2850 {
2851 // roll back offset if it occurs on a null terminator in the source buffer
2852 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
2853 result.offset--;
2854 }
2855
2856 return result;
2857 }
#define PUGI__OPTSET(OPT)
Definition pugixml.cpp:1901
xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset=0)
Definition pugixml.cpp:2215
static char_t * parse_skip_bom(char_t *s)
Definition pugixml.cpp:2787
static bool has_element_node_siblings(xml_node_struct *node)
Definition pugixml.cpp:2793

References alloc, error_offset, error_status, has_element_node_siblings(), make_parse_result(), parse_skip_bom(), parse_tree(), and PUGI__OPTSET.

◆ parse_doctype_group()

char_t * xml_parser::parse_doctype_group ( char_t *  s,
char_t  endch,
bool  toplevel 
)
inline

Definition at line 2300 of file pugixml.cpp.

2301 {
2302 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2303 s++;
2304
2305 while (*s)
2306 {
2307 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2308 {
2309 if (s[2] == '[')
2310 {
2311 // ignore
2312 s = parse_doctype_ignore(s);
2313 if (!s) return s;
2314 }
2315 else
2316 {
2317 // some control group
2318 s = parse_doctype_group(s, endch, false);
2319 if (!s) return s;
2320
2321 // skip >
2322 assert(*s == '>');
2323 s++;
2324 }
2325 }
2326 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2327 {
2328 // unknown tag (forbidden), or some primitive group
2330 if (!s) return s;
2331 }
2332 else if (*s == '>')
2333 {
2334 return s;
2335 }
2336 else s++;
2337 }
2338
2339 if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
2340
2341 return s;
2342 }
#define PUGI__THROW_ERROR(err, m)
Definition pugixml.cpp:1908
char_t * parse_doctype_primitive(char_t *s)
Definition pugixml.cpp:2241
char_t * parse_doctype_ignore(char_t *s)
Definition pugixml.cpp:2274
char_t * parse_doctype_group(char_t *s, char_t endch, bool toplevel)
Definition pugixml.cpp:2300

References parse_doctype_group(), parse_doctype_ignore(), parse_doctype_primitive(), and PUGI__THROW_ERROR.

Referenced by parse_doctype_group(), and parse_exclamation().

◆ parse_doctype_ignore()

char_t * xml_parser::parse_doctype_ignore ( char_t *  s)
inline

Definition at line 2274 of file pugixml.cpp.

2275 {
2276 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2277 s++;
2278
2279 while (*s)
2280 {
2281 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2282 {
2283 // nested ignore section
2284 s = parse_doctype_ignore(s);
2285 if (!s) return s;
2286 }
2287 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2288 {
2289 // ignore section end
2290 s += 3;
2291
2292 return s;
2293 }
2294 else s++;
2295 }
2296
2297 PUGI__THROW_ERROR(status_bad_doctype, s);
2298 }

References parse_doctype_ignore(), and PUGI__THROW_ERROR.

Referenced by parse_doctype_group(), and parse_doctype_ignore().

◆ parse_doctype_primitive()

char_t * xml_parser::parse_doctype_primitive ( char_t *  s)
inline

Definition at line 2241 of file pugixml.cpp.

2242 {
2243 if (*s == '"' || *s == '\'')
2244 {
2245 // quoted string
2246 char_t ch = *s++;
2247 PUGI__SCANFOR(*s == ch);
2248 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2249
2250 s++;
2251 }
2252 else if (s[0] == '<' && s[1] == '?')
2253 {
2254 // <? ... ?>
2255 s += 2;
2256 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2257 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2258
2259 s += 2;
2260 }
2261 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2262 {
2263 s += 4;
2264 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2265 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2266
2267 s += 4;
2268 }
2269 else PUGI__THROW_ERROR(status_bad_doctype, s);
2270
2271 return s;
2272 }
#define PUGI__SCANFOR(X)
Definition pugixml.cpp:1904

References PUGI__SCANFOR, and PUGI__THROW_ERROR.

Referenced by parse_doctype_group().

◆ parse_exclamation()

char_t * xml_parser::parse_exclamation ( char_t *  s,
xml_node_struct *  cursor,
unsigned int  optmsk,
char_t  endch 
)
inline

Definition at line 2344 of file pugixml.cpp.

2345 {
2346 // parse node contents, starting with exclamation mark
2347 ++s;
2348
2349 if (*s == '-') // '<!-...'
2350 {
2351 ++s;
2352
2353 if (*s == '-') // '<!--...'
2354 {
2355 ++s;
2356
2357 if (PUGI__OPTSET(parse_comments))
2358 {
2359 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
2360 cursor->value = s; // Save the offset.
2361 }
2362
2363 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
2364 {
2365 s = strconv_comment(s, endch);
2366
2367 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
2368 }
2369 else
2370 {
2371 // Scan for terminating '-->'.
2372 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
2373 PUGI__CHECK_ERROR(status_bad_comment, s);
2374
2375 if (PUGI__OPTSET(parse_comments))
2376 *s = 0; // Zero-terminate this segment at the first terminating '-'.
2377
2378 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2379 }
2380 }
2381 else PUGI__THROW_ERROR(status_bad_comment, s);
2382 }
2383 else if (*s == '[')
2384 {
2385 // '<![CDATA[...'
2386 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2387 {
2388 ++s;
2389
2390 if (PUGI__OPTSET(parse_cdata))
2391 {
2392 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
2393 cursor->value = s; // Save the offset.
2394
2395 if (PUGI__OPTSET(parse_eol))
2396 {
2397 s = strconv_cdata(s, endch);
2398
2399 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
2400 }
2401 else
2402 {
2403 // Scan for terminating ']]>'.
2404 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
2405 PUGI__CHECK_ERROR(status_bad_cdata, s);
2406
2407 *s++ = 0; // Zero-terminate this segment.
2408 }
2409 }
2410 else // Flagged for discard, but we still have to scan for the terminator.
2411 {
2412 // Scan for terminating ']]>'.
2413 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
2414 PUGI__CHECK_ERROR(status_bad_cdata, s);
2415
2416 ++s;
2417 }
2418
2419 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2420 }
2421 else PUGI__THROW_ERROR(status_bad_cdata, s);
2422 }
2423 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
2424 {
2425 s -= 2;
2426
2427 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
2428
2429 char_t* mark = s + 9;
2430
2431 s = parse_doctype_group(s, endch, true);
2432 if (!s) return s;
2433
2434 assert((*s == 0 && endch == '>') || *s == '>');
2435 if (*s) *s++ = 0;
2436
2437 if (PUGI__OPTSET(parse_doctype))
2438 {
2439 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
2440
2441 PUGI__PUSHNODE(node_doctype);
2442
2443 cursor->value = mark;
2444
2445 PUGI__POPNODE();
2446 }
2447 }
2448 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
2449 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
2450 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2451
2452 return s;
2453 }
#define PUGI__CHECK_ERROR(err, m)
Definition pugixml.cpp:1909
PUGI__FN char_t * strconv_comment(char_t *s, char_t endch)
Definition pugixml.cpp:1911
#define PUGI__IS_CHARTYPE(c, ct)
Definition pugixml.cpp:1195
#define PUGI__ENDSWITH(c, e)
Definition pugixml.cpp:1899
#define PUGI__POPNODE()
Definition pugixml.cpp:1903
#define PUGI__PUSHNODE(TYPE)
Definition pugixml.cpp:1902
@ ct_space
Definition pugixml.cpp:1130
PUGI__FN char_t * strconv_cdata(char_t *s, char_t endch)
Definition pugixml.cpp:1939

References ct_space, parse_doctype_group(), PUGI__CHECK_ERROR, PUGI__ENDSWITH, PUGI__IS_CHARTYPE, PUGI__OPTSET, PUGI__POPNODE, PUGI__PUSHNODE, PUGI__SCANFOR, PUGI__THROW_ERROR, strconv_cdata(), and strconv_comment().

Referenced by parse_tree().

◆ parse_question()

char_t * xml_parser::parse_question ( char_t *  s,
xml_node_struct *&  ref_cursor,
unsigned int  optmsk,
char_t  endch 
)
inline

Definition at line 2455 of file pugixml.cpp.

2456 {
2457 // load into registers
2458 xml_node_struct* cursor = ref_cursor;
2459 char_t ch = 0;
2460
2461 // parse node contents, starting with question mark
2462 ++s;
2463
2464 // read PI target
2465 char_t* target = s;
2466
2467 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
2468
2470 PUGI__CHECK_ERROR(status_bad_pi, s);
2471
2472 // determine node type; stricmp / strcasecmp is not portable
2473 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2474
2475 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
2476 {
2477 if (declaration)
2478 {
2479 // disallow non top-level declarations
2480 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
2481
2482 PUGI__PUSHNODE(node_declaration);
2483 }
2484 else
2485 {
2486 PUGI__PUSHNODE(node_pi);
2487 }
2488
2489 cursor->name = target;
2490
2491 PUGI__ENDSEG();
2492
2493 // parse value/attributes
2494 if (ch == '?')
2495 {
2496 // empty node
2497 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
2498 s += (*s == '>');
2499
2500 PUGI__POPNODE();
2501 }
2502 else if (PUGI__IS_CHARTYPE(ch, ct_space))
2503 {
2504 PUGI__SKIPWS();
2505
2506 // scan for tag end
2507 char_t* value = s;
2508
2509 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
2510 PUGI__CHECK_ERROR(status_bad_pi, s);
2511
2512 if (declaration)
2513 {
2514 // replace ending ? with / so that 'element' terminates properly
2515 *s = '/';
2516
2517 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2518 s = value;
2519 }
2520 else
2521 {
2522 // store value and step over >
2523 cursor->value = value;
2524 PUGI__POPNODE();
2525
2526 PUGI__ENDSEG();
2527
2528 s += (*s == '>');
2529 }
2530 }
2531 else PUGI__THROW_ERROR(status_bad_pi, s);
2532 }
2533 else
2534 {
2535 // scan for tag end
2536 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
2537 PUGI__CHECK_ERROR(status_bad_pi, s);
2538
2539 s += (s[1] == '>' ? 2 : 1);
2540 }
2541
2542 // store from registers
2543 ref_cursor = cursor;
2544
2545 return s;
2546 }
#define PUGI__ENDSEG()
Definition pugixml.cpp:1907
#define PUGI__SKIPWS()
Definition pugixml.cpp:1900
#define PUGI__SCANWHILE(X)
Definition pugixml.cpp:1905
@ ct_start_symbol
Definition pugixml.cpp:1134
@ ct_symbol
Definition pugixml.cpp:1133

References ct_space, ct_start_symbol, ct_symbol, PUGI__CHECK_ERROR, PUGI__ENDSEG, PUGI__ENDSWITH, PUGI__IS_CHARTYPE, PUGI__OPTSET, PUGI__POPNODE, PUGI__PUSHNODE, PUGI__SCANFOR, PUGI__SCANWHILE, PUGI__SKIPWS, and PUGI__THROW_ERROR.

Referenced by parse_tree().

◆ parse_skip_bom()

static char_t * xml_parser::parse_skip_bom ( char_t *  s)
inlinestatic

Definition at line 2787 of file pugixml.cpp.

2788 {
2789 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
2790 }

Referenced by parse().

◆ parse_tree()

char_t * xml_parser::parse_tree ( char_t *  s,
xml_node_struct *  root,
unsigned int  optmsk,
char_t  endch 
)
inline

Definition at line 2548 of file pugixml.cpp.

2549 {
2550 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
2551 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
2552
2553 char_t ch = 0;
2554 xml_node_struct* cursor = root;
2555 char_t* mark = s;
2556
2557 while (*s != 0)
2558 {
2559 if (*s == '<')
2560 {
2561 ++s;
2562
2563 LOC_TAG:
2564 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2565 {
2566 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
2567
2568 cursor->name = s;
2569
2570 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2571 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2572
2573 if (ch == '>')
2574 {
2575 // end of tag
2576 }
2577 else if (PUGI__IS_CHARTYPE(ch, ct_space))
2578 {
2579 LOC_ATTRIBUTES:
2580 while (true)
2581 {
2582 PUGI__SKIPWS(); // Eat any whitespace.
2583
2584 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2585 {
2586 xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
2587 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
2588
2589 a->name = s; // Save the offset.
2590
2591 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2592 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2593
2594 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2595 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2596
2597 if (PUGI__IS_CHARTYPE(ch, ct_space))
2598 {
2599 PUGI__SKIPWS(); // Eat any whitespace.
2600 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2601
2602 ch = *s;
2603 ++s;
2604 }
2605
2606 if (ch == '=') // '<... #=...'
2607 {
2608 PUGI__SKIPWS(); // Eat any whitespace.
2609
2610 if (*s == '"' || *s == '\'') // '<... #="...'
2611 {
2612 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2613 ++s; // Step over the quote.
2614 a->value = s; // Save the offset.
2615
2616 s = strconv_attribute(s, ch);
2617
2618 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
2619
2620 // After this line the loop continues from the start;
2621 // Whitespaces, / and > are ok, symbols and EOF are wrong,
2622 // everything else will be detected
2623 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
2624 }
2625 else PUGI__THROW_ERROR(status_bad_attribute, s);
2626 }
2627 else PUGI__THROW_ERROR(status_bad_attribute, s);
2628 }
2629 else if (*s == '/')
2630 {
2631 ++s;
2632
2633 if (*s == '>')
2634 {
2635 PUGI__POPNODE();
2636 s++;
2637 break;
2638 }
2639 else if (*s == 0 && endch == '>')
2640 {
2641 PUGI__POPNODE();
2642 break;
2643 }
2644 else PUGI__THROW_ERROR(status_bad_start_element, s);
2645 }
2646 else if (*s == '>')
2647 {
2648 ++s;
2649
2650 break;
2651 }
2652 else if (*s == 0 && endch == '>')
2653 {
2654 break;
2655 }
2656 else PUGI__THROW_ERROR(status_bad_start_element, s);
2657 }
2658
2659 // !!!
2660 }
2661 else if (ch == '/') // '<#.../'
2662 {
2663 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
2664
2665 PUGI__POPNODE(); // Pop.
2666
2667 s += (*s == '>');
2668 }
2669 else if (ch == 0)
2670 {
2671 // we stepped over null terminator, backtrack & handle closing tag
2672 --s;
2673
2674 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
2675 }
2676 else PUGI__THROW_ERROR(status_bad_start_element, s);
2677 }
2678 else if (*s == '/')
2679 {
2680 ++s;
2681
2682 char_t* name = cursor->name;
2683 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2684
2685 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
2686 {
2687 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2688 }
2689
2690 if (*name)
2691 {
2692 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
2693 else PUGI__THROW_ERROR(status_end_element_mismatch, s);
2694 }
2695
2696 PUGI__POPNODE(); // Pop.
2697
2698 PUGI__SKIPWS();
2699
2700 if (*s == 0)
2701 {
2702 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2703 }
2704 else
2705 {
2706 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2707 ++s;
2708 }
2709 }
2710 else if (*s == '?') // '<?...'
2711 {
2712 s = parse_question(s, cursor, optmsk, endch);
2713 if (!s) return s;
2714
2715 assert(cursor);
2716 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
2717 }
2718 else if (*s == '!') // '<!...'
2719 {
2720 s = parse_exclamation(s, cursor, optmsk, endch);
2721 if (!s) return s;
2722 }
2723 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
2724 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2725 }
2726 else
2727 {
2728 mark = s; // Save this offset while searching for a terminator.
2729
2730 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2731
2732 if (*s == '<' || !*s)
2733 {
2734 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
2735 assert(mark != s);
2736
2737 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
2738 {
2739 continue;
2740 }
2741 else if (PUGI__OPTSET(parse_ws_pcdata_single))
2742 {
2743 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
2744 }
2745 }
2746
2747 if (!PUGI__OPTSET(parse_trim_pcdata))
2748 s = mark;
2749
2750 if (cursor->parent || PUGI__OPTSET(parse_fragment))
2751 {
2752 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
2753 cursor->value = s; // Save the offset.
2754
2755 s = strconv_pcdata(s);
2756
2757 PUGI__POPNODE(); // Pop since this is a standalone.
2758
2759 if (!*s) break;
2760 }
2761 else
2762 {
2763 PUGI__SCANFOR(*s == '<'); // '...<'
2764 if (!*s) break;
2765
2766 ++s;
2767 }
2768
2769 // We're after '<'
2770 goto LOC_TAG;
2771 }
2772 }
2773
2774 // check that last tag is closed
2775 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2776
2777 return s;
2778 }
char_t *(* strconv_attribute_t)(char_t *, char_t)
Definition pugixml.cpp:2038
#define PUGI__SCANWHILE_UNROLL(X)
Definition pugixml.cpp:1906
static const uintptr_t xml_memory_page_type_mask
Definition pugixml.cpp:273
PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
Definition pugixml.cpp:2189
PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
Definition pugixml.cpp:2020
PUGI__FN_NO_INLINE xml_attribute_struct * append_new_attribute(xml_node_struct *node, xml_allocator &alloc)
Definition pugixml.cpp:712
char_t *(* strconv_pcdata_t)(char_t *)
Definition pugixml.cpp:1967
char_t * parse_question(char_t *s, xml_node_struct *&ref_cursor, unsigned int optmsk, char_t endch)
Definition pugixml.cpp:2455
char_t * parse_exclamation(char_t *s, xml_node_struct *cursor, unsigned int optmsk, char_t endch)
Definition pugixml.cpp:2344

References alloc, append_new_attribute(), ct_space, ct_start_symbol, ct_symbol, get_strconv_attribute(), get_strconv_pcdata(), parse_exclamation(), parse_question(), PUGI__CHECK_ERROR, PUGI__ENDSEG, PUGI__ENDSWITH, PUGI__IS_CHARTYPE, PUGI__OPTSET, PUGI__POPNODE, PUGI__PUSHNODE, PUGI__SCANFOR, PUGI__SCANWHILE_UNROLL, PUGI__SKIPWS, PUGI__THROW_ERROR, and xml_memory_page_type_mask.

Referenced by parse().

Member Data Documentation

◆ alloc

xml_allocator xml_parser::alloc

Definition at line 2226 of file pugixml.cpp.

Referenced by parse(), and parse_tree().

◆ error_offset

char_t* xml_parser::error_offset

Definition at line 2227 of file pugixml.cpp.

Referenced by parse().

◆ error_status

xml_parse_status xml_parser::error_status

Definition at line 2228 of file pugixml.cpp.

Referenced by parse().


The documentation for this struct was generated from the following file:

Generated on Mon Mar 4 2024 21:10:02 for QuickFIX by doxygen 1.9.8 written by Dimitri van Heesch, © 1997-2001