311 typedef std::shared_ptr<std::istream> stream_pointer;
319 iFinished{ false }, iError{ false }, iCharIndex{}, iLineIndex{}, iColumnIndex{}, iPreviousChar{}, iInputBufferIndex{}
328 iParent.get_token(*
this, atom);
332 catch (std::exception& e)
334 throw_with_info<std::exception>(e.what());
338 throw_with_info<std::exception>(
"unknown exception");
342 explicit operator bool()
const
347 template <
typename Exception>
348 void throw_with_info(
const std::string& aReason)
350 static std::ostringstream oss;
352 oss <<
"Lexer error: " << aReason << std::endl;
353 oss <<
"Line: " << iLineIndex << std::endl;
354 oss <<
"Column: " << iColumnIndex << std::endl;
355 throw Exception{ oss.str().c_str() };
358 const lexer& iParent;
359 stream_pointer iInput;
362 string_type iInputBuffer;
363 std::size_t iInputBufferIndex;
366 uint32_t iColumnIndex;
368 std::deque<atom_type> iQueue;
371 typedef std::vector<rule_type> rule_list;
372 enum class search_type
377 enum class match_result
387 typedef std::pair<node*, value_type> next_type;
388 typedef std::optional<next_type> optional_next_type;
395 node(
lexer& aParent) :
396 iParent{ aParent }, iTokenMap {}, iCharMap{}
402 if (holds_alternative<char_type>(aAtom))
403 map(static_variant_cast<char_type>(aAtom.value()), aNextNode);
404 else if (holds_alternative<token_type>(aAtom))
405 map(static_variant_cast<token_type>(aAtom.value()), aNextNode);
407 void map(
const atom_type& aAtom,
const atom_type& aTerminalAtom)
409 if (holds_alternative<token_type>(aTerminalAtom))
411 if (holds_alternative<char_type>(aAtom))
412 map(static_variant_cast<char_type>(aAtom.value()), static_variant_cast<token_type>(aTerminalAtom.value()));
413 else if (holds_alternative<token_type>(aAtom))
414 map(static_variant_cast<token_type>(aAtom.value()), static_variant_cast<token_type>(aTerminalAtom.value()));
415 else if (holds_alternative<function_type>(aAtom))
416 map(static_variant_cast<const function_type&>(aAtom.value()), static_variant_cast<token_type>(aTerminalAtom.value()));
418 else if (holds_alternative<function_type>(aTerminalAtom))
420 if (holds_alternative<char_type>(aAtom))
421 map(static_variant_cast<char_type>(aAtom.value()), static_variant_cast<const function_type&>(aTerminalAtom.value()));
422 else if (holds_alternative<token_type>(aAtom))
423 map(static_variant_cast<token_type>(aAtom.value()), static_variant_cast<const function_type&>(aTerminalAtom.value()));
424 else if (holds_alternative<function_type>(aAtom))
425 map(static_variant_cast<const function_type&>(aAtom.value()), static_variant_cast<const function_type&>(aTerminalAtom.value()));
428 throw bad_terminal_atom();
430 void map(char_type aChar, node& aNextNode)
432 if (iCharMap[aChar].first ==
nullptr)
433 iCharMap[aChar].first = &aNextNode;
437 void map(token_type aToken, node& aNextNode)
439 if (iTokenMap[aToken].first ==
nullptr)
440 iTokenMap[aToken].first = &aNextNode;
444 void map(
const function_type& aFunction, node& aNextNode)
446 if (iFunctionMap[aFunction].first ==
nullptr)
447 iFunctionMap[aFunction].first = &aNextNode;
451 void map(char_type aChar, token_type aTerminalToken)
453 iCharMap[aChar].second = aTerminalToken;
455 void map(token_type aToken, token_type aTerminalToken)
457 iTokenMap[aToken].second = aTerminalToken;
459 void map(
const function_type& aFunction, token_type aTerminalToken)
461 iFunctionMap[aFunction].second = aTerminalToken;
463 void map(char_type aChar,
const function_type& aTerminalFunction)
465 iCharMap[aChar].second = aTerminalFunction;
467 void map(token_type aToken,
const function_type& aTerminalFunction)
469 iTokenMap[aToken].second = aTerminalFunction;
471 void map(
const function_type& aFunction,
const function_type& aTerminalFunction)
473 iFunctionMap[aFunction].second = aTerminalFunction;
475 const next_type& lookup(
const atom_type& aAtom)
const
477 if (holds_alternative<char_type>(aAtom))
478 return lookup(static_variant_cast<char_type>(aAtom.value()));
479 else if (holds_alternative<token_type>(aAtom))
480 return lookup(static_variant_cast<token_type>(aAtom.value()));
481 else if (holds_alternative<function_type>(aAtom))
482 return lookup(static_variant_cast<const function_type&>(aAtom.value()));
484 throw unsupported_atom_type();
486 const next_type& lookup(char_type aChar)
const
488 return iCharMap[aChar];
490 const next_type& lookup(token_type aToken)
const
492 return iTokenMap[aToken];
494 const next_type& lookup(
const function_type& aFunction)
const
496 return iFunctionMap[aFunction];
498 template <
typename Iter>
499 std::pair<match_result, value_type> match(Iter aFirst, Iter aLast, search_type aSearchType)
const
501 atom_type atom = *aFirst++;
502 auto atomMatch = match_atom(atom, aSearchType);
508 return std::make_pair(match_result::Complete, atomMatch->second);
509 else if (atomMatch->first !=
nullptr)
510 return std::make_pair(match_result::Partial, value_type{});
512 return std::make_pair(match_result::None, value_type{});
514 else if (atomMatch->first !=
nullptr && (!holds_alternative<char_type>(atom) || aSearchType == search_type::String))
515 return atomMatch->first->match(aFirst, aLast, aSearchType);
517 return std::make_pair(match_result::None, value_type{});
520 return std::make_pair(match_result::None, value_type{});
523 optional_next_type match_atom(
const atom_type& aAtom, search_type aSearchType)
const
525 if (holds_alternative<char_type>(aAtom))
527 if (
this == &*iParent.iNodes.begin() || aSearchType == search_type::String)
529 auto existing = iCharMap.find(static_variant_cast<char_type>(aAtom.value()));
530 if (existing != iCharMap.end())
531 return existing->second;
533 return optional_next_type{};
535 else if (holds_alternative<token_type>(aAtom))
537 auto token = static_variant_cast<token_type>(aAtom.value());
538 auto existingToken = iTokenMap.find(token);
539 if (existingToken != iTokenMap.end())
540 return existingToken->second;
541 for (
auto iterFunction = iFunctionMap.begin(); iterFunction != iFunctionMap.end(); ++iterFunction)
543 auto functionToken = iterFunction->first.first;
544 auto functions = iterFunction->first.second;
545 bool not_ = (std::find(functions.begin(), functions.end(), lexer_atom_function::Not) != functions.end());
546 if ((functionToken == token && !not_) || (functionToken !=
token && not_))
547 return iterFunction->second;
549 return optional_next_type{};
551 throw invalid_atom();
555 mutable std::unordered_map<char_type, next_type> iCharMap;
556 mutable std::unordered_map<token_type, next_type> iTokenMap;
557 mutable std::unordered_map<function_type, next_type, boost::hash<function_type>> iFunctionMap;
558 mutable std::unordered_map<scope_type, next_type, boost::hash<function_type>> iScopeMap;
560 typedef std::list<node> node_list;
561 typedef std::shared_ptr<std::istream> stream_pointer;
564 struct bad_lex_tree : std::logic_error {
bad_lex_tree(
const std::string& reason =
"neolib::lexer_atom::bad_lex_tree") : std::logic_error(reason) {} };
566 struct invalid_token : std::runtime_error {
invalid_token(
const std::string& reason =
"neolib::lexer_atom::invalid_token") : std::runtime_error(reason) {} };
568 template <
typename Iter>
569 lexer(Iter aFirstRule, Iter aLastRule) :
572 for (
auto r = aFirstRule; r != aLastRule; ++r)
575 template <
typename Iter>
577 iDefaultScope{ aDefaultScope }
579 for (
auto r = aFirstRule; r != aLastRule; ++r)
586 newContext.iInput = std::make_shared<std::ifstream>(aPath, std::ios_base::in | std::ios_base::binary);
592 newContext.iInput = stream_pointer(stream_pointer{}, &aStream);
598 newContext.iInput = std::make_shared<std::istringstream>(aText);
602 const lexer& get_token(context& aContext, atom_type& aAtom)
const
604 if (aContext.iFinished && aContext.iQueue.empty())
606 aContext.iError =
true;
609 if (aContext.iQueue.empty())
613 for (
auto iter = aContext.iQueue.end(); iter != aContext.iQueue.begin(); iter = (backup ? aContext.iQueue.end() : iter - 1))
616 auto match = iNodes.front().match(iter - 1, aContext.iQueue.end(), search_type::Token);
617 if (match.first == match_result::Partial)
620 throw end_of_file_reached();
624 if (match.first == match_result::Complete)
626 atom_type atom = atom_type{};
627 bool endToken =
false;
628 if (std::holds_alternative<token_type>(match.second))
629 atom = atom_type{ static_variant_cast<token_type>(match.second) };
630 else if (std::holds_alternative<function_type>(match.second))
632 auto& functions = static_variant_cast<const function_type&>(match.second);
633 atom = atom_type{ functions.first };
634 auto iter2 = iter - 1;
635 for (
auto f = functions.second.begin(); f != functions.second.end(); ++f)
639 case lexer_atom_function::Eat:
640 if (iter2 != aContext.iQueue.end())
642 std::ptrdiff_t diff = iter - iter2;
643 iter2 = aContext.iQueue.erase(iter2);
647 case lexer_atom_function::Keep:
648 if (iter2 != aContext.iQueue.end())
651 case lexer_atom_function::End:
657 for (
auto iter2 = iter - 1; iter2 != aContext.iQueue.end(); ++iter2)
658 atom.token_value().append(iter2->token_value());
661 aContext.iQueue.clear();
662 aContext.iQueue.push_back(atom);
667 if (iter != aContext.iQueue.end() || aContext.iQueue.back() != atom)
669 aContext.iQueue.erase(iter - 1, aContext.iQueue.end());
670 aContext.iQueue.insert(aContext.iQueue.end(), atom);
675 if (iter - 1 == aContext.iQueue.begin())
683 else if (match.first == match_result::None)
685 if (iter - 1 == aContext.iQueue.begin())
692 auto match = iNodes.front().match(aContext.iQueue[0].token_value().begin(), aContext.iQueue[0].token_value().end(), search_type::String);
693 if (match.first == match_result::Complete)
695 bool changed =
false;
696 if (std::holds_alternative<token_type>(match.second))
698 auto token = static_variant_cast<token_type>(match.second);
699 if (aContext.iQueue[0].token() != token)
701 aContext.iQueue[0].set_token(token);
705 else if (std::holds_alternative<function_type>(match.second))
707 auto token = static_variant_cast<const function_type&>(match.second).first;
708 if (aContext.iQueue[0].token() != token)
710 aContext.iQueue[0].set_token(token);
715 return get_token(aContext, aAtom);
717 aAtom = aContext.iQueue[0];
718 aContext.iQueue.pop_front();
721 void build(
const rule_type& aRule)
724 iNodes.push_back(node{ *
this });
725 build(aRule, 0u, iNodes.front());
727 node& build(
const rule_type& aRule, std::size_t aExpressionIndex, node& aNode)
729 return build(aRule, aRule.expression[aExpressionIndex], aExpressionIndex, aNode);
731 node& build(
const rule_type& aRule,
const atom_type& aAtom, std::size_t aExpressionIndex, node& aNode,
bool aHalt =
false)
733 if (holds_alternative<range_type>(aAtom))
735 auto& r = static_variant_cast<const range_type&>(aAtom.value());
736 for (char_type ch = r.first; ch < r.second; ++ch)
737 build(aRule, ch, aExpressionIndex, aNode);
738 return build(aRule, r.second, aExpressionIndex, aNode);
740 else if (holds_alternative<string_type>(aAtom))
742 auto s = static_variant_cast<const std::string&>(aAtom.value());
743 auto& n = build(aRule, s[0], aExpressionIndex, aNode, s.size() > 1 ?
true : false);
746 return build(aRule, s, aExpressionIndex, n);
750 else if (aExpressionIndex == aRule.expression.size() - 1 && !aHalt)
752 aNode.map(aAtom, aRule.symbol);
757 auto&
next = aNode.lookup(aAtom);
758 if (
next.first !=
nullptr)
761 return build(aRule, aExpressionIndex + 1, *
next.first);
767 iNodes.push_back(node{ *
this });
768 auto& newNode = iNodes.back();
769 aNode.map(aAtom, newNode);
771 return build(aRule, aExpressionIndex + 1, newNode);
777 bool next(context& aContext)
const
779 const std::size_t BUF_SIZE = 32;
780 if (aContext.iInputBuffer.empty())
782 aContext.iInputBuffer.resize(BUF_SIZE);
783 aContext.iInput->read(&aContext.iInputBuffer[0], BUF_SIZE);
784 std::streamsize amount = aContext.iInput->gcount();
785 aContext.iInputBuffer.resize(
static_cast<std::size_t
>(amount));
788 if (aContext.iCharIndex == 0)
789 aContext.iError =
true;
790 aContext.iFinished =
true;
794 if (aContext.iCharIndex == 0)
796 const string_type BOM_UTF8 =
"\xEF\xBB\xBF";
797 const string_type BOM_UTF16LE =
"\xFF\xFE";
798 const string_type BOM_UTF16BE =
"\xFE\xFF";
799 if (aContext.iInputBuffer.find(BOM_UTF8) == 0)
800 aContext.iInputBuffer = aContext.iInputBuffer.substr(BOM_UTF8.size());
801 else if (aContext.iInputBuffer.find(BOM_UTF16LE) == 0)
802 throw style_sheet_not_utf8();
803 else if (aContext.iInputBuffer.find(BOM_UTF16BE) == 0)
804 throw style_sheet_not_utf8();
806 if (aContext.iPreviousChar ==
'\n' || aContext.iCharIndex == 0)
808 ++aContext.iLineIndex;
809 aContext.iColumnIndex = 0;
811 ++aContext.iColumnIndex;
812 ++aContext.iCharIndex;
813 char_type ch = aContext.iInputBuffer[aContext.iInputBufferIndex];
814 aContext.iPreviousChar = ch;
815 aContext.iQueue.emplace_back(ch, string_type(1, ch));
816 if (++aContext.iInputBufferIndex == aContext.iInputBuffer.size())
818 aContext.iInputBuffer.clear();
819 aContext.iInputBufferIndex = 0;
824 scope_type iDefaultScope;