Orcus
yaml_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9#define INCLUDED_ORCUS_YAML_PARSER_HPP
10
11#include "orcus/yaml_parser_base.hpp"
12#include "orcus/parser_global.hpp"
13
14namespace orcus {
15
17{
18public:
22 void begin_parse() {}
23
27 void end_parse() {}
28
33
37 void end_document() {}
38
43
47 void end_sequence() {}
48
52 void begin_map() {}
53
57 void begin_map_key() {}
58
62 void end_map_key() {}
63
67 void end_map() {}
68
75 void string(const char* p, size_t n)
76 {
77 (void)p; (void)n;
78 }
79
85 void number(double val)
86 {
87 (void)val;
88 }
89
93 void boolean_true() {}
94
98 void boolean_false() {}
99
103 void null() {}
104};
105
106template<typename _Handler>
108{
109public:
110 typedef _Handler handler_type;
111
112 yaml_parser(const char* p, size_t n, handler_type& hdl);
113
114 void parse();
115
116private:
117 size_t end_scope();
118 void check_or_begin_document();
119 void check_or_begin_map();
120 void check_or_begin_sequence();
121 void parse_value(const char* p, size_t len);
122 void push_value(const char* p, size_t len);
123 void parse_line(const char* p, size_t len);
124 void parse_map_key(const char* p, size_t len);
125
126 void handler_begin_parse();
127 void handler_end_parse();
128 void handler_begin_document();
129 void handler_end_document();
130 void handler_begin_sequence();
131 void handler_end_sequence();
132 void handler_begin_map();
133 void handler_end_map();
134 void handler_begin_map_key();
135 void handler_end_map_key();
136 void handler_string(const char* p, size_t n);
137 void handler_number(double val);
138 void handler_boolean_true();
139 void handler_boolean_false();
140 void handler_null();
141
142private:
143 handler_type& m_handler;
144};
145
146template<typename _Handler>
148{
149 push_parse_token(yaml::detail::parse_token_t::begin_parse);
150 m_handler.begin_parse();
151}
152
153template<typename _Handler>
154void yaml_parser<_Handler>::handler_end_parse()
155{
156 push_parse_token(yaml::detail::parse_token_t::end_parse);
157 m_handler.end_parse();
158}
159
160template<typename _Handler>
161void yaml_parser<_Handler>::handler_begin_document()
162{
163 push_parse_token(yaml::detail::parse_token_t::begin_document);
164 m_handler.begin_document();
165}
166
167template<typename _Handler>
168void yaml_parser<_Handler>::handler_end_document()
169{
170 push_parse_token(yaml::detail::parse_token_t::end_document);
171 m_handler.end_document();
172}
173
174template<typename _Handler>
175void yaml_parser<_Handler>::handler_begin_sequence()
176{
177 push_parse_token(yaml::detail::parse_token_t::begin_sequence);
178 m_handler.begin_sequence();
179}
180
181template<typename _Handler>
182void yaml_parser<_Handler>::handler_end_sequence()
183{
184 push_parse_token(yaml::detail::parse_token_t::end_sequence);
185 m_handler.end_sequence();
186}
187
188template<typename _Handler>
189void yaml_parser<_Handler>::handler_begin_map()
190{
191 push_parse_token(yaml::detail::parse_token_t::begin_map);
192 m_handler.begin_map();
193}
194
195template<typename _Handler>
196void yaml_parser<_Handler>::handler_end_map()
197{
198 push_parse_token(yaml::detail::parse_token_t::end_map);
199 m_handler.end_map();
200}
201
202template<typename _Handler>
203void yaml_parser<_Handler>::handler_begin_map_key()
204{
205 push_parse_token(yaml::detail::parse_token_t::begin_map_key);
206 m_handler.begin_map_key();
207}
208
209template<typename _Handler>
210void yaml_parser<_Handler>::handler_end_map_key()
211{
212 push_parse_token(yaml::detail::parse_token_t::end_map_key);
213 m_handler.end_map_key();
214}
215
216template<typename _Handler>
217void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
218{
219 push_parse_token(yaml::detail::parse_token_t::string);
220 m_handler.string(p, n);
221}
222
223template<typename _Handler>
224void yaml_parser<_Handler>::handler_number(double val)
225{
226 push_parse_token(yaml::detail::parse_token_t::number);
227 m_handler.number(val);
228}
229
230template<typename _Handler>
231void yaml_parser<_Handler>::handler_boolean_true()
232{
233 push_parse_token(yaml::detail::parse_token_t::boolean_true);
234 m_handler.boolean_true();
235}
236
237template<typename _Handler>
238void yaml_parser<_Handler>::handler_boolean_false()
239{
240 push_parse_token(yaml::detail::parse_token_t::boolean_false);
241 m_handler.boolean_false();
242}
243
244template<typename _Handler>
245void yaml_parser<_Handler>::handler_null()
246{
247 push_parse_token(yaml::detail::parse_token_t::null);
248 m_handler.null();
249}
250
251template<typename _Handler>
252yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) :
253 yaml::parser_base(p, n), m_handler(hdl) {}
254
255template<typename _Handler>
256void yaml_parser<_Handler>::parse()
257{
258 handler_begin_parse();
259
260 while (has_char())
261 {
262 reset_on_new_line();
263
264 size_t indent = parse_indent();
265 if (indent == parse_indent_end_of_stream)
266 break;
267
268 if (indent == parse_indent_blank_line)
269 continue;
270
271 size_t cur_scope = get_scope();
272
273 if (cur_scope <= indent)
274 {
275 if (in_literal_block())
276 {
277 handle_line_in_literal(indent);
278 continue;
279 }
280
281 if (has_line_buffer())
282 {
283 // This line is part of multi-line string. Push the line to the
284 // buffer as-is.
285 handle_line_in_multi_line_string();
286 continue;
287 }
288 }
289
290 if (cur_scope == scope_empty)
291 {
292 if (indent > 0)
293 throw yaml::parse_error(
294 "first node of the document should not be indented.", offset());
295
296 push_scope(indent);
297 }
298 else if (indent > cur_scope)
299 {
300 push_scope(indent);
301 }
302 else if (indent < cur_scope)
303 {
304 // Current indent is less than the current scope level.
305 do
306 {
307 cur_scope = end_scope();
308 if (cur_scope < indent)
309 throw yaml::parse_error("parse: invalid indent level.", offset());
310 }
311 while (indent < cur_scope);
312 }
313
314 // Parse the rest of the line.
315 std::string_view line = parse_to_end_of_line();
316 line = trim(line);
317
318 assert(!line.empty());
319 parse_line(line.data(), line.size());
320 }
321
322 // End all remaining scopes.
323 size_t cur_scope = get_scope();
324 while (cur_scope != scope_empty)
325 cur_scope = end_scope();
326
327 if (get_doc_hash())
328 handler_end_document();
329
330 handler_end_parse();
331}
332
333template<typename _Handler>
334size_t yaml_parser<_Handler>::end_scope()
335{
336 switch (get_scope_type())
337 {
338 case yaml::detail::scope_t::map:
339 {
340 if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
341 handler_null();
342
343 handler_end_map();
344 break;
345 }
346 case yaml::detail::scope_t::sequence:
347 {
348 if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
349 handler_null();
350
351 handler_end_sequence();
352 break;
353 }
354 case yaml::detail::scope_t::multi_line_string:
355 {
356 std::string_view merged = merge_line_buffer();
357 handler_string(merged.data(), merged.size());
358 break;
359 }
360 default:
361 {
362 if (has_line_buffer())
363 {
364 assert(get_line_buffer_count() == 1);
365 std::string_view line = pop_line_front();
366 parse_value(line.data(), line.size());
367 }
368 }
369 }
370 return pop_scope();
371}
372
373template<typename _Handler>
374void yaml_parser<_Handler>::check_or_begin_document()
375{
376 if (!get_doc_hash())
377 {
378 set_doc_hash(mp_char);
379 handler_begin_document();
380 }
381}
382
383template<typename _Handler>
384void yaml_parser<_Handler>::check_or_begin_map()
385{
386 switch (get_scope_type())
387 {
388 case yaml::detail::scope_t::unset:
389 {
390 check_or_begin_document();
391 set_scope_type(yaml::detail::scope_t::map);
392 handler_begin_map();
393 break;
394 }
395 case yaml::detail::scope_t::map:
396 {
397 if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
398 handler_null();
399 break;
400 }
401 default:
402 ;
403 }
404}
405
406template<typename _Handler>
407void yaml_parser<_Handler>::check_or_begin_sequence()
408{
409 switch (get_scope_type())
410 {
411 case yaml::detail::scope_t::unset:
412 {
413 check_or_begin_document();
414 set_scope_type(yaml::detail::scope_t::sequence);
415 handler_begin_sequence();
416 break;
417 }
418 case yaml::detail::scope_t::sequence:
419 {
420 if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
421 handler_null();
422 break;
423 }
424 default:
425 ;
426 }
427
428 push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
429}
430
431template<typename _Handler>
432void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
433{
434 check_or_begin_document();
435
436 const char* p0 = p;
437 const char* p_end = p + len;
438 double val = parse_numeric(p, len);
439 if (p == p_end)
440 {
441 handler_number(val);
442 return;
443 }
444
445 yaml::detail::keyword_t kw = parse_keyword(p0, len);
446
447 if (kw != yaml::detail::keyword_t::unknown)
448 {
449 switch (kw)
450 {
451 case yaml::detail::keyword_t::null:
452 handler_null();
453 break;
454 case yaml::detail::keyword_t::boolean_true:
455 handler_boolean_true();
456 break;
457 case yaml::detail::keyword_t::boolean_false:
458 handler_boolean_false();
459 break;
460 default:
461 ;
462 }
463
464 return;
465 }
466
467 // Failed to parse it as a number or a keyword. It must be a string.
468 handler_string(p0, len);
469}
470
471template<typename _Handler>
472void yaml_parser<_Handler>::push_value(const char* p, size_t len)
473{
474 check_or_begin_document();
475
476 if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
477 set_scope_type(yaml::detail::scope_t::multi_line_string);
478
479 push_line_back(p, len);
480}
481
482template<typename _Handler>
483void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
484{
485 const char* p_end = p + len;
486 const char* p0 = p; // Save the original head position.
487
488 if (*p == '-')
489 {
490 ++p;
491 if (p == p_end)
492 {
493 // List item start.
494 check_or_begin_sequence();
495 return;
496 }
497
498 switch (*p)
499 {
500 case '-':
501 {
502 // start of a document
503 ++p;
504 if (p == p_end)
505 throw yaml::parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
506
507 if (*p != '-')
508 yaml::parse_error::throw_with(
509 "parse_line: '-' expected but '", *p, "' found.",
510 offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
511
512 ++p; // Skip the '-'.
513 set_doc_hash(p);
514 handler_begin_document();
515 clear_scopes();
516
517 if (p != p_end)
518 {
519 skip_blanks(p, p_end-p);
520
521 // Whatever comes after '---' is equivalent of first node.
522 assert(p != p_end);
523 push_scope(0);
524 parse_line(p, p_end-p);
525 }
526 return;
527 }
528 case ' ':
529 {
530 check_or_begin_sequence();
531
532 // list item start with inline first item content.
533 ++p;
534 if (p == p_end)
535 throw yaml::parse_error(
536 "parse_line: list item expected, but the line ended prematurely.",
537 offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
538
539 skip_blanks(p, p_end-p);
540
541 size_t scope_width = get_scope() + (p-p0);
542 push_scope(scope_width);
543 parse_line(p, p_end-p);
544 return;
545 }
546 default:
547 // It is none of the above.
548 p = p0;
549 }
550
551 }
552
553 if (get_scope_type() == yaml::detail::scope_t::sequence)
554 yaml::parse_error::throw_with(
555 "'-' was expected for a sequence element, but '", *p, "' was found.",
556 offset_last_char_of_line()-len+1);
557
558 // If the line doesn't start with a "- ", it must be a dictionary key.
559 parse_map_key(p, len);
560}
561
562template<typename _Handler>
563void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
564{
565 const char* p_end = p + len;
566 const char* p0 = p; // Save the original head position.
567
568 switch (*p)
569 {
570 case '"':
571 {
572 std::string_view quoted_str = parse_double_quoted_string_value(p, len);
573
574 if (p == p_end)
575 {
576 handler_string(quoted_str.data(), quoted_str.size());
577 return;
578 }
579
580 skip_blanks(p, p_end-p);
581
582 if (*p != ':')
583 throw yaml::parse_error(
584 "parse_map_key: ':' is expected after the quoted string key.",
585 offset() - std::ptrdiff_t(p_end-p+1));
586
587 check_or_begin_map();
588 handler_begin_map_key();
589 handler_string(quoted_str.data(), quoted_str.size());
590 handler_end_map_key();
591
592 ++p; // skip the ':'.
593 if (p == p_end)
594 return;
595
596 // Skip all white spaces.
597 skip_blanks(p, p_end-p);
598 }
599 break;
600 case '\'':
601 {
602 std::string_view quoted_str = parse_single_quoted_string_value(p, len);
603
604 if (p == p_end)
605 {
606 handler_string(quoted_str.data(), quoted_str.size());
607 return;
608 }
609
610 skip_blanks(p, p_end-p);
611
612 if (*p != ':')
613 throw yaml::parse_error(
614 "parse_map_key: ':' is expected after the quoted string key.",
615 offset() - std::ptrdiff_t(p_end-p+1));
616
617 check_or_begin_map();
618 handler_begin_map_key();
619 handler_string(quoted_str.data(), quoted_str.size());
620 handler_end_map_key();
621
622 ++p; // skip the ':'.
623 if (p == p_end)
624 return;
625
626 skip_blanks(p, p_end-p);
627 }
628 break;
629 default:
630 {
631 key_value kv = parse_key_value(p, p_end-p);
632
633 if (kv.key.empty())
634 {
635 // No map key found.
636 if (*p == '|')
637 {
638 start_literal_block();
639 return;
640 }
641
642 push_value(p, len);
643 return;
644 }
645
646 check_or_begin_map();
647 handler_begin_map_key();
648 parse_value(kv.key.data(), kv.key.size());
649 handler_end_map_key();
650
651 if (kv.value.empty())
652 return;
653
654 p = kv.value.data();
655 }
656 }
657
658 if (*p == '|')
659 {
660 start_literal_block();
661 return;
662 }
663
664 // inline map item.
665 if (*p == '-')
666 throw yaml::parse_error(
667 "parse_map_key: sequence entry is not allowed as an inline map item.",
668 offset() - std::ptrdiff_t(p_end-p+1));
669
670 size_t scope_width = get_scope() + (p-p0);
671 push_scope(scope_width);
672 parse_line(p, p_end-p);
673}
674
675}
676
677#endif
678
679/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: yaml_parser_base.hpp:75
Definition: yaml_parser.hpp:17
void end_map()
Definition: yaml_parser.hpp:67
void begin_parse()
Definition: yaml_parser.hpp:22
void end_sequence()
Definition: yaml_parser.hpp:47
void end_parse()
Definition: yaml_parser.hpp:27
void null()
Definition: yaml_parser.hpp:103
void string(const char *p, size_t n)
Definition: yaml_parser.hpp:75
void boolean_true()
Definition: yaml_parser.hpp:93
void begin_map_key()
Definition: yaml_parser.hpp:57
void boolean_false()
Definition: yaml_parser.hpp:98
void begin_map()
Definition: yaml_parser.hpp:52
void number(double val)
Definition: yaml_parser.hpp:85
void end_document()
Definition: yaml_parser.hpp:37
void begin_document()
Definition: yaml_parser.hpp:32
void begin_sequence()
Definition: yaml_parser.hpp:42
void end_map_key()
Definition: yaml_parser.hpp:62
Definition: yaml_parser.hpp:108