8#ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_PARSER_HPP
11#include "sax_parser_base.hpp"
99 (void)val; (void)transient;
131template<
typename HandlerT,
typename ConfigT = sax_parser_default_config>
135 typedef HandlerT handler_type;
136 typedef ConfigT config_type;
138 sax_parser(std::string_view content, handler_type& handler);
139 ~sax_parser() =
default;
152 void element_open(std::ptrdiff_t begin_pos);
153 void element_close(std::ptrdiff_t begin_pos);
155 void declaration(
const char* name_check);
162 handler_type& m_handler;
165template<
typename HandlerT,
typename ConfigT>
166sax_parser<HandlerT,ConfigT>::sax_parser(std::string_view content, handler_type& handler) :
172template<
typename HandlerT,
typename ConfigT>
173void sax_parser<HandlerT,ConfigT>::parse()
178 skip_space_and_control();
181 assert(m_buffer_pos == 0);
184template<
typename HandlerT,
typename ConfigT>
185void sax_parser<HandlerT,ConfigT>::header()
193 skip_space_and_control();
195 if (!has_char() || cur_char() !=
'<')
198 if (config_type::baseline_version >= 11)
202 if (next_char_checked() !=
'?')
209template<
typename HandlerT,
typename ConfigT>
210void sax_parser<HandlerT,ConfigT>::body()
214 if (cur_char() ==
'<')
217 if (!m_root_elem_open)
221 else if (m_nest_level)
229template<
typename HandlerT,
typename ConfigT>
230void sax_parser<HandlerT,ConfigT>::element()
232 assert(cur_char() ==
'<');
233 std::ptrdiff_t pos = offset();
234 char c = next_char_checked();
244 declaration(
nullptr);
251template<
typename HandlerT,
typename ConfigT>
252void sax_parser<HandlerT,ConfigT>::element_open(std::ptrdiff_t begin_pos)
255 element_name(elem, begin_pos);
259 skip_space_and_control();
260 char c = cur_char_checked();
264 if (next_and_char() !=
'>')
267 elem.end_pos = offset();
268 m_handler.start_element(elem);
270 m_handler.end_element(elem);
272 m_root_elem_open =
false;
273#if ORCUS_DEBUG_SAX_PARSER
274 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
282 elem.end_pos = offset();
284 m_handler.start_element(elem);
286#if ORCUS_DEBUG_SAX_PARSER
287 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
296template<
typename HandlerT,
typename ConfigT>
297void sax_parser<HandlerT,ConfigT>::element_close(std::ptrdiff_t begin_pos)
299 assert(cur_char() ==
'/');
303 element_name(elem, begin_pos);
305 if (cur_char() !=
'>')
308 elem.end_pos = offset();
310 m_handler.end_element(elem);
311#if ORCUS_DEBUG_SAX_PARSER
312 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
315 m_root_elem_open =
false;
318template<
typename HandlerT,
typename ConfigT>
319void sax_parser<HandlerT,ConfigT>::special_tag()
321 assert(cur_char() ==
'!');
323 size_t len = available_size();
327 switch (next_and_char())
332 if (next_and_char() !=
'-')
346 expects_next(
"CDATA[", 6);
354 expects_next(
"OCTYPE", 6);
355 skip_space_and_control();
365template<
typename HandlerT,
typename ConfigT>
366void sax_parser<HandlerT,ConfigT>::declaration(
const char* name_check)
368 assert(cur_char() ==
'?');
372 std::string_view decl_name;
374#if ORCUS_DEBUG_SAX_PARSER
375 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
378 if (name_check && decl_name != name_check)
380 std::ostringstream os;
381 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
385 m_handler.start_declaration(decl_name);
386 skip_space_and_control();
389 while (cur_char_checked() !=
'?')
392 skip_space_and_control();
394 if (next_char_checked() !=
'>')
397 m_handler.end_declaration(decl_name);
400#if ORCUS_DEBUG_SAX_PARSER
401 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
405template<
typename HandlerT,
typename ConfigT>
406void sax_parser<HandlerT,ConfigT>::cdata()
408 size_t len = available_size();
412 const char* p0 = mp_char;
413 size_t i = 0, match = 0;
414 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
428 else if (c ==
'>' && match == 2)
431 size_t cdata_len = i - 2;
432 m_handler.characters(std::string_view(p0, cdata_len),
false);
442template<
typename HandlerT,
typename ConfigT>
443void sax_parser<HandlerT,ConfigT>::doctype()
447 name(param.root_element);
448 skip_space_and_control();
451 size_t len = available_size();
455 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
459 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
462 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
466 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
471 skip_space_and_control();
474 value(param.fpi,
false);
476 has_char_throw(
"DOCTYPE section too short.");
477 skip_space_and_control();
478 has_char_throw(
"DOCTYPE section too short.");
480 if (cur_char() ==
'>')
483#if ORCUS_DEBUG_SAX_PARSER
484 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
486 m_handler.doctype(param);
492 value(param.uri,
false);
494 has_char_throw(
"DOCTYPE section too short.");
495 skip_space_and_control();
496 has_char_throw(
"DOCTYPE section too short.");
498 if (cur_char() !=
'>')
499 throw malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
501#if ORCUS_DEBUG_SAX_PARSER
502 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
504 m_handler.doctype(param);
508template<
typename HandlerT,
typename ConfigT>
509void sax_parser<HandlerT,ConfigT>::characters()
511 const char* p0 = mp_char;
512 for (; has_char(); next())
514 if (cur_char() ==
'<')
517 if (cur_char() ==
'&')
522 buf.append(p0, mp_char-p0);
523 characters_with_encoded_char(buf);
525 m_handler.characters(std::string_view{},
false);
527 m_handler.characters(buf.str(),
true);
534 std::string_view val(p0, mp_char-p0);
535 m_handler.characters(val,
false);
539template<
typename HandlerT,
typename ConfigT>
540void sax_parser<HandlerT,ConfigT>::attribute()
543 attribute_name(attr.ns, attr.name);
545#if ORCUS_DEBUG_SAX_PARSER
546 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
549 skip_space_and_control();
551 char c = cur_char_checked();
554 std::ostringstream os;
555 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
560 skip_space_and_control();
562 attr.transient = value(attr.value,
true);
567#if ORCUS_DEBUG_SAX_PARSER
568 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
571 m_handler.attribute(attr);
Definition cell_buffer.hpp:22
Definition parser_base.hpp:23
Definition sax_parser_base.hpp:108
Definition sax_parser.hpp:29
void end_declaration(std::string_view decl)
Definition sax_parser.hpp:58
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition sax_parser.hpp:36
void attribute(const orcus::sax::parser_attribute &attr)
Definition sax_parser.hpp:110
void characters(std::string_view val, bool transient)
Definition sax_parser.hpp:97
void start_declaration(std::string_view decl)
Definition sax_parser.hpp:48
void end_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:78
void start_element(const orcus::sax::parser_element &elem)
Definition sax_parser.hpp:68
Definition sax_parser_base.hpp:37
Definition sax_parser_base.hpp:96
Definition sax_parser_base.hpp:77
Definition sax_parser.hpp:19
static constexpr uint8_t baseline_version
Definition sax_parser.hpp:25