neoGFX
Cross-platform C++ app/game engine
Loading...
Searching...
No Matches
lexer.hpp
Go to the documentation of this file.
1// lexer.hpp
2/*
3 * Copyright (c) 2007 Leigh Johnston.
4 *
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * * Neither the name of Leigh Johnston nor the names of any
19 * other contributors to this software may be used to endorse or
20 * promote products derived from this software without specific prior
21 * written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
24 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34*/
35
36#pragma once
37
38#include <neolib/neolib.hpp>
39#include <string>
40#include <vector>
41#include <deque>
42#include <unordered_map>
43#include <fstream>
44#include <sstream>
45#include <optional>
46#include <boost/functional/hash.hpp>
48
49namespace neolib
50{
52
53 inline bool operator==(const lexer_atom_match_any&, const lexer_atom_match_any&)
54 {
55 return true;
56 }
57
58 inline std::size_t hash_value(const lexer_atom_match_any&)
59 {
60 return 0u;
61 }
62
64 {
65 Eat,
66 Keep,
67 Not,
68 End
69 };
70
71 template <typename Token, typename CharT = char>
72 class lexer_token : public std::pair<Token, std::basic_string<CharT>>
73 {
74 typedef std::pair<Token, std::basic_string<CharT>> base_type;
75 public:
76 typedef Token token_type;
77 typedef std::basic_string<CharT> value_type;
78 public:
80 base_type{}
81 {
82 }
83 lexer_token(token_type aToken, const value_type& aValue) :
84 base_type{aToken, aValue}
85 {
86 }
87 public:
89 {
90 return base_type::first;
91 }
92 const value_type& value() const
93 {
94 return base_type::second;
95 }
96 };
97
98 struct no_scopes {};
99
100 template <typename Token, typename Scope = no_scopes, typename CharT = char>
102 {
103 public:
104 typedef Token token_type;
105 typedef Scope scope_type;
106 typedef CharT char_type;
107 typedef std::pair<char_type, char_type> range_type;
108 typedef std::basic_string<char_type> string_type;
109 typedef std::vector<lexer_atom_function> function_list;
110 typedef std::pair<token_type, function_list> function_type;
111 typedef std::pair<scope_type, bool> scope_change_type;
112 typedef std::variant<std::monostate, char_type, range_type, string_type, lexer_atom_match_any, token_type, function_type, scope_type, scope_change_type> value_type;
114 public:
115 struct not_token : std::logic_error { not_token(const std::string& aBadToken) : std::logic_error("Invalid token: '" + aBadToken + "'") {} };
116 struct not_scope : std::logic_error { not_scope(const std::string& aBadScope) : std::logic_error("Invalid scope: '" + aBadScope + "'") {} };
117 public:
119 iValue{}
120 {
121 }
122 template <typename T>
123 lexer_atom(const T& aValue, const token_value_type& aTokenValue = token_value_type{}) :
124 iValue{ aValue }, iTokenValue{ aTokenValue }
125 {
126 }
127 template <typename T>
128 lexer_atom(lexer_atom_function aFunction, const T& aValue, const token_value_type& aTokenValue = token_value_type{}) :
129 iValue{ function_type{ aValue, { aFunction } } }, iTokenValue{ aTokenValue }
130 {
131 }
132 public:
133 bool operator==(const lexer_atom& aOther) const
134 {
135 return iValue == aOther.iValue && iTokenValue == aOther.iTokenValue;
136 }
137 bool operator!=(const lexer_atom& aOther) const
138 {
139 return !(*this == aOther);
140 }
141 public:
142 template <typename T>
143 bool is() const
144 {
145 return std::holds_alternative<T>(iValue);
146 }
147 const value_type& value() const
148 {
149 return iValue;
150 }
151 bool is_token() const
152 {
153 return std::holds_alternative<token_type>(iValue) || std::holds_alternative<function_type>(iValue);
154 }
156 {
157 if (std::holds_alternative<token_type>(iValue))
158 return static_variant_cast<token_type>(iValue);
159 else if (std::holds_alternative<function_type>(iValue))
160 return static_variant_cast<const function_type&>(iValue).first;
161 else if (std::holds_alternative<char_type>(iValue))
162 throw not_token(std::string(1, static_variant_cast<char_type>(iValue)));
163 else if (std::holds_alternative<string_type>(iValue))
164 throw not_token(static_variant_cast<const string_type&>(iValue));
165 else
166 throw not_token("???");
167 }
169 {
170 iValue = aToken;
171 }
172 bool is_scope() const
173 {
174 return std::holds_alternative<scope_type>(iValue);
175 }
177 {
178 if (std::holds_alternative<scope_type>(iValue))
179 return static_variant_cast<scope_type>(iValue);
180 else
181 throw not_scope("???");
182 }
184 {
185 iValue = aScope;
186 }
187 bool has_functions() const
188 {
189 return std::holds_alternative<function_type>(iValue);
190 }
192 {
193 return static_variant_cast<const function_type&>(iValue).second;
194 }
196 {
197 return static_variant_cast<function_type&>(iValue).second;
198 }
200 {
201 return iTokenValue;
202 }
204 {
205 return iTokenValue;
206 }
207 private:
208 value_type iValue;
209 token_value_type iTokenValue;
210 };
211
212 template <typename T, typename Token, typename Scope, typename CharT>
213 inline bool holds_alternative(const lexer_atom<Token, Scope, CharT>& aAtom)
214 {
215 return aAtom.is<T>();
216 }
217
218 template <typename Atom>
220 {
221 public:
222 typedef Atom atom_type;
223 typedef typename atom_type::char_type char_type;
224 typedef typename atom_type::token_type token_type;
225 typedef typename atom_type::scope_type scope_type;
226 typedef typename atom_type::range_type range_type;
227 public:
229 std::vector<atom_type> expression;
230 public:
231 static constexpr std::pair<scope_type, bool> enter_scope(scope_type aScope)
232 {
233 return std::make_pair(aScope, true);
234 }
235 static constexpr std::pair<scope_type, bool> leave_scope(scope_type aScope)
236 {
237 return std::make_pair(aScope, false);
238 }
239 static constexpr atom_type token_end(token_type aToken)
240 {
241 return atom_type{ lexer_atom_function::End, aToken };
242 }
244 {
245 if (aAtom.has_functions())
246 {
247 aAtom.functions().push_back(lexer_atom_function::End);
248 return aAtom;
249 }
250 return typename atom_type::function_type{ aAtom.token(),{ lexer_atom_function::End } };
251 }
252 static constexpr atom_type token_eat(token_type aToken)
253 {
254 return atom_type{ lexer_atom_function::Eat, aToken };
255 }
257 {
258 if (aAtom.has_functions())
259 {
260 aAtom.functions().push_back(lexer_atom_function::Eat);
261 return aAtom;
262 }
263 return typename atom_type::function_type{ aAtom.token(),{ lexer_atom_function::Eat } };
264 }
265 static constexpr atom_type token_keep(token_type aToken)
266 {
267 return atom_type{ lexer_atom_function::Keep, aToken };
268 }
270 {
271 if (aAtom.has_functions())
272 {
273 aAtom.functions().push_back(lexer_atom_function::Keep);
274 return aAtom;
275 }
276 return typename atom_type::function_type{ aAtom.token(),{ lexer_atom_function::Keep } };
277 }
278 static constexpr atom_type token_make(token_type aToken, char_type aChar)
279 {
280 return atom_type{aToken, typename atom_type::token_value_type{ 1, aChar } };
281 }
282 static constexpr atom_type token_not(token_type aToken)
283 {
284 return atom_type{ lexer_atom_function::Not, aToken };
285 }
286 static constexpr range_type token_range(char_type aFrom, char_type aTo)
287 {
288 return range_type{ aFrom, aTo };
289 }
291 {
292 return lexer_atom_match_any{};
293 }
294 };
295
296 template <typename Atom>
297 class lexer
298 {
299 public:
300 typedef Atom atom_type;
301 typedef typename atom_type::token_type token_type;
302 typedef typename atom_type::scope_type scope_type;
303 typedef typename atom_type::scope_change_type scope_change_type;
304 typedef typename atom_type::char_type char_type;
306 typedef typename atom_type::range_type range_type;
307 typedef typename atom_type::string_type string_type;
308 typedef typename atom_type::function_type function_type;
310 private:
311 typedef std::shared_ptr<std::istream> stream_pointer;
312 public:
314 {
315 friend class lexer;
316 public:
317 context(const lexer& aParent) :
318 iParent{ aParent },
319 iFinished{ false }, iError{ false }, iCharIndex{}, iLineIndex{}, iColumnIndex{}, iPreviousChar{}, iInputBufferIndex{}
320 {
321 }
322 public:
324 {
325 try
326 {
327 atom_type atom;
328 iParent.get_token(*this, atom);
329 if (*this)
330 aToken = lexer_token_type{ atom.token(), atom.token_value() };
331 }
332 catch (std::exception& e)
333 {
334 throw_with_info<std::exception>(e.what());
335 }
336 catch (...)
337 {
338 throw_with_info<std::exception>("unknown exception");
339 }
340 return *this;
341 }
342 explicit operator bool() const
343 {
344 return !iError;
345 }
346 private:
347 template <typename Exception>
348 void throw_with_info(const std::string& aReason)
349 {
350 static std::ostringstream oss;
351 oss.str("");
352 oss << "Lexer error: " << aReason << std::endl;
353 oss << "Line: " << iLineIndex << std::endl;
354 oss << "Column: " << iColumnIndex << std::endl;
355 throw Exception{ oss.str().c_str() };
356 }
357 private:
358 const lexer& iParent;
359 stream_pointer iInput;
360 bool iFinished;
361 bool iError;
362 string_type iInputBuffer;
363 std::size_t iInputBufferIndex;
364 uint32_t iCharIndex;
365 uint32_t iLineIndex;
366 uint32_t iColumnIndex;
367 char iPreviousChar;
368 std::deque<atom_type> iQueue;
369 };
370 private:
371 typedef std::vector<rule_type> rule_list;
372 enum class search_type
373 {
374 Token,
375 String
376 };
377 enum class match_result
378 {
379 None,
380 Partial,
381 Complete
382 };
383 class node
384 {
385 public:
387 typedef std::pair<node*, value_type> next_type;
388 typedef std::optional<next_type> optional_next_type;
389 public:
390 struct bad_terminal_atom : std::logic_error { bad_terminal_atom() : std::logic_error("neolib::lexer::node::bad_terminal_atom") {} };
391 struct unsupported_atom_type : std::logic_error { unsupported_atom_type() : std::logic_error("neolib::lexer::node::unsupported_atom_type") {} };
392 struct node_exists : std::logic_error { node_exists() : std::logic_error("neolib::lexer::node::node_exists") {} };
393 struct invalid_atom : std::invalid_argument { invalid_atom() : std::invalid_argument("neolib::lexer::node::invalid_atom") {} };
394 public:
395 node(lexer& aParent) :
396 iParent{ aParent }, iTokenMap {}, iCharMap{}
397 {
398 }
399 public:
400 void map(const atom_type& aAtom, node& aNextNode)
401 {
402 if (holds_alternative<char_type>(aAtom))
403 map(static_variant_cast<char_type>(aAtom.value()), aNextNode);
404 else if (holds_alternative<token_type>(aAtom))
405 map(static_variant_cast<token_type>(aAtom.value()), aNextNode);
406 }
407 void map(const atom_type& aAtom, const atom_type& aTerminalAtom)
408 {
409 if (holds_alternative<token_type>(aTerminalAtom))
410 {
411 if (holds_alternative<char_type>(aAtom))
412 map(static_variant_cast<char_type>(aAtom.value()), static_variant_cast<token_type>(aTerminalAtom.value()));
413 else if (holds_alternative<token_type>(aAtom))
414 map(static_variant_cast<token_type>(aAtom.value()), static_variant_cast<token_type>(aTerminalAtom.value()));
415 else if (holds_alternative<function_type>(aAtom))
416 map(static_variant_cast<const function_type&>(aAtom.value()), static_variant_cast<token_type>(aTerminalAtom.value()));
417 }
418 else if (holds_alternative<function_type>(aTerminalAtom))
419 {
420 if (holds_alternative<char_type>(aAtom))
421 map(static_variant_cast<char_type>(aAtom.value()), static_variant_cast<const function_type&>(aTerminalAtom.value()));
422 else if (holds_alternative<token_type>(aAtom))
423 map(static_variant_cast<token_type>(aAtom.value()), static_variant_cast<const function_type&>(aTerminalAtom.value()));
424 else if (holds_alternative<function_type>(aAtom))
425 map(static_variant_cast<const function_type&>(aAtom.value()), static_variant_cast<const function_type&>(aTerminalAtom.value()));
426 }
427 else
428 throw bad_terminal_atom();
429 }
430 void map(char_type aChar, node& aNextNode)
431 {
432 if (iCharMap[aChar].first == nullptr)
433 iCharMap[aChar].first = &aNextNode;
434 else
435 throw node_exists();
436 }
437 void map(token_type aToken, node& aNextNode)
438 {
439 if (iTokenMap[aToken].first == nullptr)
440 iTokenMap[aToken].first = &aNextNode;
441 else
442 throw node_exists();
443 }
444 void map(const function_type& aFunction, node& aNextNode)
445 {
446 if (iFunctionMap[aFunction].first == nullptr)
447 iFunctionMap[aFunction].first = &aNextNode;
448 else
449 throw node_exists();
450 }
451 void map(char_type aChar, token_type aTerminalToken)
452 {
453 iCharMap[aChar].second = aTerminalToken;
454 }
455 void map(token_type aToken, token_type aTerminalToken)
456 {
457 iTokenMap[aToken].second = aTerminalToken;
458 }
459 void map(const function_type& aFunction, token_type aTerminalToken)
460 {
461 iFunctionMap[aFunction].second = aTerminalToken;
462 }
463 void map(char_type aChar, const function_type& aTerminalFunction)
464 {
465 iCharMap[aChar].second = aTerminalFunction;
466 }
467 void map(token_type aToken, const function_type& aTerminalFunction)
468 {
469 iTokenMap[aToken].second = aTerminalFunction;
470 }
471 void map(const function_type& aFunction, const function_type& aTerminalFunction)
472 {
473 iFunctionMap[aFunction].second = aTerminalFunction;
474 }
475 const next_type& lookup(const atom_type& aAtom) const
476 {
477 if (holds_alternative<char_type>(aAtom))
478 return lookup(static_variant_cast<char_type>(aAtom.value()));
479 else if (holds_alternative<token_type>(aAtom))
480 return lookup(static_variant_cast<token_type>(aAtom.value()));
481 else if (holds_alternative<function_type>(aAtom))
482 return lookup(static_variant_cast<const function_type&>(aAtom.value()));
483 else
484 throw unsupported_atom_type();
485 }
486 const next_type& lookup(char_type aChar) const
487 {
488 return iCharMap[aChar];
489 }
490 const next_type& lookup(token_type aToken) const
491 {
492 return iTokenMap[aToken];
493 }
494 const next_type& lookup(const function_type& aFunction) const
495 {
496 return iFunctionMap[aFunction];
497 }
498 template <typename Iter>
499 std::pair<match_result, value_type> match(Iter aFirst, Iter aLast, search_type aSearchType) const
500 {
501 atom_type atom = *aFirst++;
502 auto atomMatch = match_atom(atom, aSearchType);
503 if (atomMatch)
504 {
505 if (aFirst == aLast)
506 {
507 if (atomMatch->second != neolib::none)
508 return std::make_pair(match_result::Complete, atomMatch->second);
509 else if (atomMatch->first != nullptr)
510 return std::make_pair(match_result::Partial, value_type{});
511 else
512 return std::make_pair(match_result::None, value_type{});
513 }
514 else if (atomMatch->first != nullptr && (!holds_alternative<char_type>(atom) || aSearchType == search_type::String))
515 return atomMatch->first->match(aFirst, aLast, aSearchType);
516 else
517 return std::make_pair(match_result::None, value_type{});
518 }
519 else
520 return std::make_pair(match_result::None, value_type{});
521 }
522 private:
523 optional_next_type match_atom(const atom_type& aAtom, search_type aSearchType) const
524 {
525 if (holds_alternative<char_type>(aAtom))
526 {
527 if (this == &*iParent.iNodes.begin() || aSearchType == search_type::String)
528 {
529 auto existing = iCharMap.find(static_variant_cast<char_type>(aAtom.value()));
530 if (existing != iCharMap.end())
531 return existing->second;
532 }
533 return optional_next_type{};
534 }
535 else if (holds_alternative<token_type>(aAtom))
536 {
537 auto token = static_variant_cast<token_type>(aAtom.value());
538 auto existingToken = iTokenMap.find(token);
539 if (existingToken != iTokenMap.end())
540 return existingToken->second;
541 for (auto iterFunction = iFunctionMap.begin(); iterFunction != iFunctionMap.end(); ++iterFunction)
542 {
543 auto functionToken = iterFunction->first.first;
544 auto functions = iterFunction->first.second;
545 bool not_ = (std::find(functions.begin(), functions.end(), lexer_atom_function::Not) != functions.end());
546 if ((functionToken == token && !not_) || (functionToken != token && not_))
547 return iterFunction->second;
548 }
549 return optional_next_type{};
550 }
551 throw invalid_atom();
552 }
553 private:
554 lexer& iParent;
555 mutable std::unordered_map<char_type, next_type> iCharMap;
556 mutable std::unordered_map<token_type, next_type> iTokenMap;
557 mutable std::unordered_map<function_type, next_type, boost::hash<function_type>> iFunctionMap;
558 mutable std::unordered_map<scope_type, next_type, boost::hash<function_type>> iScopeMap;
559 };
560 typedef std::list<node> node_list;
561 typedef std::shared_ptr<std::istream> stream_pointer;
562 public:
563 struct style_sheet_not_utf8 : std::runtime_error { style_sheet_not_utf8(const std::string& reason = "neolib::lexer_atom::style_sheet_not_utf8") : std::runtime_error(reason) {} };
564 struct bad_lex_tree : std::logic_error { bad_lex_tree(const std::string& reason = "neolib::lexer_atom::bad_lex_tree") : std::logic_error(reason) {} };
565 struct end_of_file_reached : std::runtime_error { end_of_file_reached(const std::string& reason = "neolib::lexer_atom::end_of_file_reached") : std::runtime_error(reason) {} };
566 struct invalid_token : std::runtime_error { invalid_token(const std::string& reason = "neolib::lexer_atom::invalid_token") : std::runtime_error(reason) {} };
567 public:
568 template <typename Iter>
569 lexer(Iter aFirstRule, Iter aLastRule) :
570 iDefaultScope{}
571 {
572 for (auto r = aFirstRule; r != aLastRule; ++r)
573 build(*r);
574 }
575 template <typename Iter>
576 lexer(scope_type aDefaultScope, Iter aFirstRule, Iter aLastRule) :
577 iDefaultScope{ aDefaultScope }
578 {
579 for (auto r = aFirstRule; r != aLastRule; ++r)
580 build(*r);
581 }
582 public:
583 context open(const std::string& aPath) const
584 {
585 context newContext{ *this };
586 newContext.iInput = std::make_shared<std::ifstream>(aPath, std::ios_base::in | std::ios_base::binary);
587 return newContext;
588 }
589 context use(std::istream& aStream) const
590 {
591 context newContext{ *this };
592 newContext.iInput = stream_pointer(stream_pointer{}, &aStream);
593 return newContext;
594 }
595 context use(const std::string& aText) const
596 {
597 context newContext{ *this };
598 newContext.iInput = std::make_shared<std::istringstream>(aText);
599 return newContext;
600 }
601 private:
602 const lexer& get_token(context& aContext, atom_type& aAtom) const
603 {
604 if (aContext.iFinished && aContext.iQueue.empty())
605 {
606 aContext.iError = true;
607 return *this;
608 }
609 if (aContext.iQueue.empty())
610 if (!next(aContext))
611 return *this;
612 bool backup = false;
613 for (auto iter = aContext.iQueue.end(); iter != aContext.iQueue.begin(); iter = (backup ? aContext.iQueue.end() : iter - 1))
614 {
615 backup = false;
616 auto match = iNodes.front().match(iter - 1, aContext.iQueue.end(), search_type::Token);
617 if (match.first == match_result::Partial)
618 {
619 if (!next(aContext))
620 throw end_of_file_reached();
621 backup = true;
622 continue;
623 }
624 if (match.first == match_result::Complete)
625 {
626 atom_type atom = atom_type{};
627 bool endToken = false;
628 if (std::holds_alternative<token_type>(match.second))
629 atom = atom_type{ static_variant_cast<token_type>(match.second) };
630 else if (std::holds_alternative<function_type>(match.second))
631 {
632 auto& functions = static_variant_cast<const function_type&>(match.second);
633 atom = atom_type{ functions.first };
634 auto iter2 = iter - 1;
635 for (auto f = functions.second.begin(); f != functions.second.end(); ++f)
636 {
637 switch (*f)
638 {
639 case lexer_atom_function::Eat:
640 if (iter2 != aContext.iQueue.end())
641 {
642 std::ptrdiff_t diff = iter - iter2;
643 iter2 = aContext.iQueue.erase(iter2);
644 iter = iter2 + diff;
645 }
646 break;
647 case lexer_atom_function::Keep:
648 if (iter2 != aContext.iQueue.end())
649 ++iter2;
650 break;
651 case lexer_atom_function::End:
652 endToken = true;
653 break;
654 }
655 }
656 }
657 for (auto iter2 = iter - 1; iter2 != aContext.iQueue.end(); ++iter2)
658 atom.token_value().append(iter2->token_value());
659 if (endToken)
660 {
661 aContext.iQueue.clear();
662 aContext.iQueue.push_back(atom);
663 break;
664 }
665 else
666 {
667 if (iter != aContext.iQueue.end() || aContext.iQueue.back() != atom)
668 {
669 aContext.iQueue.erase(iter - 1, aContext.iQueue.end());
670 aContext.iQueue.insert(aContext.iQueue.end(), atom);
671 backup = true;
672 }
673 else
674 {
675 if (iter - 1 == aContext.iQueue.begin())
676 {
677 if (next(aContext))
678 backup = true;
679 }
680 }
681 }
682 }
683 else if (match.first == match_result::None)
684 {
685 if (iter - 1 == aContext.iQueue.begin())
686 {
687 next(aContext);
688 break;
689 }
690 }
691 }
692 auto match = iNodes.front().match(aContext.iQueue[0].token_value().begin(), aContext.iQueue[0].token_value().end(), search_type::String);
693 if (match.first == match_result::Complete)
694 {
695 bool changed = false;
696 if (std::holds_alternative<token_type>(match.second))
697 {
698 auto token = static_variant_cast<token_type>(match.second);
699 if (aContext.iQueue[0].token() != token)
700 {
701 aContext.iQueue[0].set_token(token);
702 changed = true;
703 }
704 }
705 else if (std::holds_alternative<function_type>(match.second))
706 {
707 auto token = static_variant_cast<const function_type&>(match.second).first;
708 if (aContext.iQueue[0].token() != token)
709 {
710 aContext.iQueue[0].set_token(token);
711 changed = true;
712 }
713 }
714 if (changed)
715 return get_token(aContext, aAtom); // recurse
716 }
717 aAtom = aContext.iQueue[0];
718 aContext.iQueue.pop_front();
719 return *this;
720 }
721 void build(const rule_type& aRule)
722 {
723 if (iNodes.empty())
724 iNodes.push_back(node{ *this });
725 build(aRule, 0u, iNodes.front());
726 }
727 node& build(const rule_type& aRule, std::size_t aExpressionIndex, node& aNode)
728 {
729 return build(aRule, aRule.expression[aExpressionIndex], aExpressionIndex, aNode);
730 }
731 node& build(const rule_type& aRule, const atom_type& aAtom, std::size_t aExpressionIndex, node& aNode, bool aHalt = false)
732 {
733 if (holds_alternative<range_type>(aAtom))
734 {
735 auto& r = static_variant_cast<const range_type&>(aAtom.value());
736 for (char_type ch = r.first; ch < r.second; ++ch)
737 build(aRule, ch, aExpressionIndex, aNode);
738 return build(aRule, r.second, aExpressionIndex, aNode);
739 }
740 else if (holds_alternative<string_type>(aAtom))
741 {
742 auto s = static_variant_cast<const std::string&>(aAtom.value());
743 auto& n = build(aRule, s[0], aExpressionIndex, aNode, s.size() > 1 ? true : false);
744 s = s.substr(1);
745 if (!s.empty())
746 return build(aRule, s, aExpressionIndex, n);
747 else
748 return n;
749 }
750 else if (aExpressionIndex == aRule.expression.size() - 1 && !aHalt)
751 {
752 aNode.map(aAtom, aRule.symbol);
753 return aNode;
754 }
755 else
756 {
757 auto& next = aNode.lookup(aAtom);
758 if (next.first != nullptr)
759 {
760 if (!aHalt)
761 return build(aRule, aExpressionIndex + 1, *next.first);
762 else
763 return *next.first;
764 }
765 else
766 {
767 iNodes.push_back(node{ *this });
768 auto& newNode = iNodes.back();
769 aNode.map(aAtom, newNode);
770 if (!aHalt)
771 return build(aRule, aExpressionIndex + 1, newNode);
772 else
773 return newNode;
774 }
775 }
776 }
777 bool next(context& aContext) const
778 {
779 const std::size_t BUF_SIZE = 32;
780 if (aContext.iInputBuffer.empty())
781 {
782 aContext.iInputBuffer.resize(BUF_SIZE);
783 aContext.iInput->read(&aContext.iInputBuffer[0], BUF_SIZE);
784 std::streamsize amount = aContext.iInput->gcount();
785 aContext.iInputBuffer.resize(static_cast<std::size_t>(amount));
786 if (amount == 0)
787 {
788 if (aContext.iCharIndex == 0)
789 aContext.iError = true;
790 aContext.iFinished = true;
791 return false;
792 }
793 }
794 if (aContext.iCharIndex == 0)
795 {
796 const string_type BOM_UTF8 = "\xEF\xBB\xBF";
797 const string_type BOM_UTF16LE = "\xFF\xFE";
798 const string_type BOM_UTF16BE = "\xFE\xFF";
799 if (aContext.iInputBuffer.find(BOM_UTF8) == 0)
800 aContext.iInputBuffer = aContext.iInputBuffer.substr(BOM_UTF8.size());
801 else if (aContext.iInputBuffer.find(BOM_UTF16LE) == 0)
802 throw style_sheet_not_utf8();
803 else if (aContext.iInputBuffer.find(BOM_UTF16BE) == 0)
804 throw style_sheet_not_utf8();
805 }
806 if (aContext.iPreviousChar == '\n' || aContext.iCharIndex == 0)
807 {
808 ++aContext.iLineIndex;
809 aContext.iColumnIndex = 0;
810 }
811 ++aContext.iColumnIndex;
812 ++aContext.iCharIndex;
813 char_type ch = aContext.iInputBuffer[aContext.iInputBufferIndex];
814 aContext.iPreviousChar = ch;
815 aContext.iQueue.emplace_back(ch, string_type(1, ch));
816 if (++aContext.iInputBufferIndex == aContext.iInputBuffer.size())
817 {
818 aContext.iInputBuffer.clear();
819 aContext.iInputBufferIndex = 0;
820 }
821 return true;
822 }
823 private:
824 scope_type iDefaultScope;
825 node_list iNodes;
826 };
827}
context(const lexer &aParent)
Definition lexer.hpp:317
context & operator>>(lexer_token_type &aToken)
Definition lexer.hpp:323
bool is_token() const
Definition lexer.hpp:151
token_value_type & token_value()
Definition lexer.hpp:203
bool is_scope() const
Definition lexer.hpp:172
void set_token(token_type aToken)
Definition lexer.hpp:168
std::pair< token_type, function_list > function_type
Definition lexer.hpp:110
function_list & functions()
Definition lexer.hpp:195
std::variant< std::monostate, char_type, range_type, string_type, lexer_atom_match_any, token_type, function_type, scope_type, scope_change_type > value_type
Definition lexer.hpp:112
bool has_functions() const
Definition lexer.hpp:187
std::pair< scope_type, bool > scope_change_type
Definition lexer.hpp:111
bool operator==(const lexer_atom &aOther) const
Definition lexer.hpp:133
void set_scope(scope_type aScope)
Definition lexer.hpp:183
bool is() const
Definition lexer.hpp:143
lexer_atom(lexer_atom_function aFunction, const T &aValue, const token_value_type &aTokenValue=token_value_type{})
Definition lexer.hpp:128
const token_value_type & token_value() const
Definition lexer.hpp:199
token_type token() const
Definition lexer.hpp:155
string_type token_value_type
Definition lexer.hpp:113
lexer_atom(const T &aValue, const token_value_type &aTokenValue=token_value_type{})
Definition lexer.hpp:123
std::basic_string< char_type > string_type
Definition lexer.hpp:108
scope_type scope() const
Definition lexer.hpp:176
const function_list & functions() const
Definition lexer.hpp:191
std::vector< lexer_atom_function > function_list
Definition lexer.hpp:109
bool operator!=(const lexer_atom &aOther) const
Definition lexer.hpp:137
std::pair< char_type, char_type > range_type
Definition lexer.hpp:107
const value_type & value() const
Definition lexer.hpp:147
static constexpr atom_type token_keep(token_type aToken)
Definition lexer.hpp:265
static constexpr std::pair< scope_type, bool > enter_scope(scope_type aScope)
Definition lexer.hpp:231
atom_type::token_type token_type
Definition lexer.hpp:224
static constexpr atom_type token_eat(token_type aToken)
Definition lexer.hpp:252
atom_type::char_type char_type
Definition lexer.hpp:223
atom_type::range_type range_type
Definition lexer.hpp:226
static constexpr std::pair< scope_type, bool > leave_scope(scope_type aScope)
Definition lexer.hpp:235
atom_type::scope_type scope_type
Definition lexer.hpp:225
static atom_type token_eat(atom_type aAtom)
Definition lexer.hpp:256
static constexpr atom_type token_not(token_type aToken)
Definition lexer.hpp:282
static atom_type token_keep(atom_type aAtom)
Definition lexer.hpp:269
static constexpr range_type token_range(char_type aFrom, char_type aTo)
Definition lexer.hpp:286
static atom_type token_end(atom_type aAtom)
Definition lexer.hpp:243
std::vector< atom_type > expression
Definition lexer.hpp:229
static constexpr atom_type token_make(token_type aToken, char_type aChar)
Definition lexer.hpp:278
static constexpr lexer_atom_match_any token_any()
Definition lexer.hpp:290
static constexpr atom_type token_end(token_type aToken)
Definition lexer.hpp:239
atom_type symbol
Definition lexer.hpp:228
token_type token() const
Definition lexer.hpp:88
std::basic_string< CharT > value_type
Definition lexer.hpp:77
lexer_token(token_type aToken, const value_type &aValue)
Definition lexer.hpp:83
const value_type & value() const
Definition lexer.hpp:92
atom_type::string_type string_type
Definition lexer.hpp:307
Atom atom_type
Definition lexer.hpp:300
lexer(Iter aFirstRule, Iter aLastRule)
Definition lexer.hpp:569
context open(const std::string &aPath) const
Definition lexer.hpp:583
atom_type::function_type function_type
Definition lexer.hpp:308
atom_type::scope_type scope_type
Definition lexer.hpp:302
atom_type::range_type range_type
Definition lexer.hpp:306
lexer_token< token_type, char_type > lexer_token_type
Definition lexer.hpp:305
context use(const std::string &aText) const
Definition lexer.hpp:595
atom_type::char_type char_type
Definition lexer.hpp:304
context use(std::istream &aStream) const
Definition lexer.hpp:589
lexer_rule< atom_type > rule_type
Definition lexer.hpp:309
atom_type::scope_change_type scope_change_type
Definition lexer.hpp:303
lexer(scope_type aDefaultScope, Iter aFirstRule, Iter aLastRule)
Definition lexer.hpp:576
atom_type::token_type token_type
Definition lexer.hpp:301
lexer_atom_function
Definition lexer.hpp:64
std::size_t hash_value(const neolib::basic_quick_string< charT, Traits, Alloc > &sv)
const none_t none
Definition variant.hpp:111
it_type next(it_type it, const typename iterator_traits< it_type >::difference_type distance=1)
Definition plf_hive.h:89
bad_lex_tree(const std::string &reason="neolib::lexer_atom::bad_lex_tree")
Definition lexer.hpp:564
end_of_file_reached(const std::string &reason="neolib::lexer_atom::end_of_file_reached")
Definition lexer.hpp:565
invalid_token(const std::string &reason="neolib::lexer_atom::invalid_token")
Definition lexer.hpp:566
style_sheet_not_utf8(const std::string &reason="neolib::lexer_atom::style_sheet_not_utf8")
Definition lexer.hpp:563
not_scope(const std::string &aBadScope)
Definition lexer.hpp:116
not_token(const std::string &aBadToken)
Definition lexer.hpp:115