OpenWalnut  1.5.0dev
WStructuredTextParser.h
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #ifndef WSTRUCTUREDTEXTPARSER_H
26 #define WSTRUCTUREDTEXTPARSER_H
27 
28 #include <algorithm>
29 #include <iostream>
30 #include <map>
31 #include <ostream>
32 #include <string>
33 #include <vector>
34 
35 #ifndef Q_MOC_RUN
36  // We exclude inclusion of boost fusion, since the combination of Qt 5.3 moc and Boost 1.56 causes MOC error. Moc complains
37  // about a macro in the boost headers. Moc does not need all symbols to be defined, so we leave these headers out.
38  #include <boost/config/warning_disable.hpp>
39  #include <boost/spirit/include/qi.hpp>
40  #include <boost/spirit/include/phoenix_core.hpp>
41  #include <boost/spirit/include/phoenix_operator.hpp>
42  #include <boost/spirit/include/phoenix_fusion.hpp>
43  #include <boost/spirit/include/phoenix_stl.hpp>
44  #include <boost/spirit/include/phoenix_object.hpp>
45  #include <boost/fusion/include/adapt_struct.hpp>
46  #include <boost/fusion/include/io.hpp>
47  #include <boost/variant/recursive_variant.hpp>
48 #endif
49 
50 #include <boost/filesystem/path.hpp>
51 
52 #include "WStringUtils.h"
53 #include "exceptions/WTypeMismatch.h"
54 #include "exceptions/WNotFound.h"
55 
56 /**
57  * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST)
58  * for the given input which later can be traversed.
59  */
61 {
62  //! we use these quite often, so define some short alias for them:
63  namespace qi = boost::spirit::qi;
64 
65  //! we use these quite often, so define some short alias for them:
66  namespace fusion = boost::fusion;
67 
68  //! we use these quite often, so define some short alias for them:
69  namespace ascii = boost::spirit::ascii;
70 
71  //! we use these quite often, so define some short alias for them:
72  namespace phoenix = boost::phoenix;
73 
74  //! we use these quite often, so define some short alias for them:
75  namespace spirit = boost::spirit;
76 
77  /**
78  * The type used for keys
79  */
80  typedef std::string KeyType;
81 
82  /**
83  * The type used for values
84  */
85  typedef std::string ValueType;
86 
87  /**
88  * The type used for comments
89  */
90  typedef std::string CommentType;
91 
92  /**
93  * Forward declare the object type.
94  */
95  struct ObjectType;
96 
97  /**
98  * KeyValueType - a tuple containing name and value
99  */
101  {
102  /**
103  * Name string.
104  */
105  std::string m_name;
106  /**
107  * Value string.
108  */
109  std::string m_value;
110  };
111 
112  /**
113  * A node inside the AST is either another object or a key-value pair.
114  */
115  typedef
116  boost::variant<
117  boost::recursive_wrapper< ObjectType >,
118  KeyValueType,
120  >
122 
123  /**
124  * An object is always a name and contains several further nodes
125  */
126  struct ObjectType
127  {
128  /**
129  * Name of the object
130  */
131  std::string m_name;
132 
133  /**
134  * Object's members
135  */
136  std::vector< MemberType > m_nodes;
137  };
138 
139  /**
140  * An object representing all objects and comments on file level.
141  */
142  typedef std::vector< MemberType > FileType;
143 }
144 
145 
146 /**
147  * Tell boost::fusion about our types.
148  */
149 BOOST_FUSION_ADAPT_STRUCT(
151  ( std::string, m_name )
152  ( std::vector< WStructuredTextParser::MemberType >, m_nodes )
153  )
154 
155 /**
156  * Tell boost::fusion about our types.
157  */
158 BOOST_FUSION_ADAPT_STRUCT(
160  ( std::string, m_name )
161  ( std::string, m_value )
162  )
163 
164 namespace WStructuredTextParser
165 {
166  /**
167  * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to
168  * successfully parse a file:
169  * <ul>
170  * <li>Key: identifier, needs to be a-z,A-Z,0-9,_
171  * <li>Object: defined as key + { ... }
172  * <li> ";" is optional after objects
173  * <li>Key-Value Pair: is a member of an object and is defines as key="value".
174  * <li>Comments begin with //
175  * </ul>
176  * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt.
177  *
178  * \tparam Iterator the iterator, used to get the input stream
179  */
180  template <typename Iterator>
181  struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type >
182  {
183  /**
184  * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse.
185  *
186  * \param error Will contain error message if any occurs during functions execution
187  */
188  explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref
189  {
190  // a key begins with a letter
191  key %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" );
192  // a value is a quoted string. Multi-line strings possible
193  value %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"';
194 
195  // a pair is: key = value
196  kvpair %= key >> '=' >> value >> ';';
197  // a comment is // + arbitrary symbols
198  comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ];
199  // a object is a name, and a set of nested objects or key-value pairs
200  object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" );
201  // a file is basically an object without name.
202  file %= *( object | kvpair | comment );
203 
204  // provide names for these objects for better readability of parse errors
205  object.name( "object" );
206  kvpair.name( "key-value pair" );
207  key.name( "key" );
208  value.name( "value" );
209  file.name( "file" );
210  comment.name( "comment" );
211 
212  // provide error handlers
213  // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work.
214  qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 );
215  qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 );
216  qi::on_error< qi::fail >( key, error << phoenix::val( "Error: " ) << qi::_4 );
217  qi::on_error< qi::fail >( value, error << phoenix::val( "Error: " ) << qi::_4 );
218  qi::on_error< qi::fail >( comment, error << phoenix::val( "Error: " ) << qi::_4 );
219  qi::on_error< qi::fail >( file, error << phoenix::val( "Error: " ) << qi::_4 );
220  }
221 
222  // Rules we use
223 
224  /**
225  * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition.
226  */
227  qi::rule< Iterator, ObjectType(), ascii::space_type > object;
228 
229  /**
230  * Rule for files. Basically the same as an object but without name
231  */
232  qi::rule< Iterator, FileType(), ascii::space_type > file;
233 
234  /**
235  * Rule for comments. Ignored.
236  */
237  qi::rule< Iterator, CommentType(), ascii::space_type > comment;
238 
239  /**
240  * Key-value pair rule. See constructor for exact definition.
241  */
242  qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair;
243 
244  /**
245  * Key rule. See constructor for exact definition.
246  */
247  qi::rule< Iterator, KeyType() > key;
248 
249  /**
250  * Value rule. See constructor for exact definition.
251  */
252  qi::rule< Iterator, ValueType() > value;
253  };
254 
255  /**
256  * This simplifies working with a tree in a \ref WStructuredTextParser::FileType instance. It provides easy query and check methods. It does not
257  * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As
258  * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify
259  * the type.
260  *
261  * This tree uses the types in the WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive
262  * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the
263  * \ref WStructuredTextParser::ObjectType tree.
264  *
265  * \note The syntax of the parsed files is defined by the parser itself. See WStructuredTextParser::Grammar for details.
266  * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes
267  * it.
268  */
269  class StructuredValueTree
270  {
271  friend class WStructuredTextParserTest; //!< Access for test class.
272  public:
273  /**
274  * This char is used as separator for identifying values in the tree. NEVER change this value.
275  */
276  static const std::string Separator;
277 
278  /**
279  * Construct the instance given the original parsing structure.
280  *
281  * \param file the parsing result structure (the root node).
282  */
283  explicit StructuredValueTree( const FileType& file );
284 
285  /**
286  * Construct the instance given a text as string.
287  *
288  * \param toParse the text to parse
289  */
290  explicit StructuredValueTree( const std::string& toParse );
291 
292  /**
293  * Construct the instance given a path to a file to load.
294  *
295  * \param file the path to a file to load.
296  */
297  explicit StructuredValueTree( const boost::filesystem::path& file );
298 
299  /**
300  * Creates an empty tree. It will contain no information at all.
301  */
302  StructuredValueTree();
303 
304  /**
305  * Cleanup.
306  */
307  virtual ~StructuredValueTree();
308 
309  /**
310  * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to
311  * true.
312  *
313  * \param key path to the value
314  * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function
315  * to return true.
316  *
317  * \return true if existing.
318  */
319  bool exists( std::string key, bool valuesOnly = false ) const;
320 
321  /**
322  * It is possible that there are multiple values matching a key. This method counts them.
323  *
324  * \param key path to the values to count
325  * \param valuesOnly if true, it only counts values matching the given name.
326  *
327  * \return the number of found values.
328  */
329  size_t count( std::string key, bool valuesOnly = false ) const;
330 
331  /**
332  * Queries the value with the given name. If it is not found, the default value will be returned.
333  *
334  * \param key path to the value. Paths to whole objects are invalid.
335  * \param defaultValue the default if no value was found
336  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
337  *
338  * \throw WTypeMismatch if the value cannot be cast to the specified target type
339  *
340  * \return the value
341  *
342  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
343  */
344  template< typename T >
345  T getValue( std::string key, const T& defaultValue ) const;
346 
347  /**
348  * Queries the list of values matching the given path. If it is not found, the default value will be returned.
349  *
350  * \param key path to the value. Paths to whole objects are invalid.
351  * \param defaults the defaults if no value was found
352  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
353  *
354  * \throw WTypeMismatch if the value cannot be cast to the specified target type
355  *
356  * \return the value
357  *
358  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
359  */
360  template< typename T >
361  std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const;
362 
363  /**
364  * Queries the list of values matching the given path. If it is not found, an empty results vector is returned.
365  *
366  * \param key path to the value. Paths to whole objects are invalid.
367  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
368  *
369  * \throw WTypeMismatch if the value cannot be cast to the specified target type
370  *
371  * \return the value vector. Might be empty if no elements where found.
372  *
373  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
374  */
375  template< typename T >
376  std::vector< T > getValues( std::string key ) const;
377 
378  /**
379  * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first
380  * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count
381  *
382  * \param key path to the value. Paths to whole objects are invalid.
383  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
384  * \throw WTypeMismatch if the value cannot be cast to the specified target type
385  * \throw WNotFound if the key:value pair does not exist
386  *
387  * \return the value as copy to avoid any const_cast which would allow modification.
388  */
389  template< typename T >
390  T operator[]( std::string key ) const;
391 
392  /**
393  * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are
394  * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key
395  * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
396  *
397  * \param key key to search.
398  *
399  * \return the structured value tree.
400  */
401  StructuredValueTree getSubTree( std::string key ) const;
402 
403  /**
404  * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of
405  * subtrees is returned. If nothing matches, the returned list is empty. If they key
406  * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
407  *
408  * \param key key to search.
409  *
410  * \return the structured value trees.
411  */
412  std::vector< StructuredValueTree > getSubTrees( std::string key ) const;
413 
414  protected:
415  private:
416  /**
417  * The named values.
418  */
419  FileType m_file;
420 
421  /**
422  * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
423  * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
424  *
425  * \param current current element to check and recursively traverse
426  * \param keyIter the current path element
427  * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements
428  * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType
429  * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType
430  */
431  void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter,
432  std::vector< std::string >::const_iterator keyEnd,
433  std::vector< ObjectType >& resultObjects,
434  std::vector< KeyValueType >& resultValues ) const;
435 
436  /**
437  * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
438  * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
439  *
440  * \param current current element to check and recursively traverse
441  * \param key the path
442  * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType
443  * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType
444  */
445  void traverse( FileType current, std::string key,
446  std::vector< ObjectType >& resultObjects,
447  std::vector< KeyValueType >& resultValues ) const;
448  };
449 
450  /**
451  * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
452  *
453  * \param input the input to parse.
454  *
455  * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
456  *
457  * \throw WParseError on parse error
458  */
459  FileType parseFromString( std::string input );
460 
461  /**
462  * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
463  *
464  * \param path the file to parse
465  *
466  * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
467  *
468  * \throw WParseError on parse error
469  * \throw WFileNotFOund in case the specified file could not be opened
470  */
471  FileType parseFromFile( boost::filesystem::path path );
472 
473  template< typename T >
474  T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const
475  {
476  // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator
477  return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin();
478  }
479 
480  template< typename T >
481  std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const
482  {
483  std::vector< T > r = getValues< T >( key );
484  if( r.size() )
485  {
486  return r;
487  }
488  else
489  {
490  return defaults;
491  }
492  }
493 
494  template< typename T >
495  T StructuredValueTree::operator[]( std::string key ) const
496  {
497  std::vector< T > r = getValues< T >( key );
498  if( r.size() )
499  {
500  return *r.begin();
501  }
502  else
503  {
504  throw WNotFound( "The key \"" + key + "\" was not found." );
505  }
506  }
507 
508  /**
509  * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a object or key-value pair.
510  */
511  class IsLeafVisitor: public boost::static_visitor< bool >
512  {
513  public:
514  /**
515  * Returns always true as it is only called for key-value pairs.
516  *
517  * \return always true since it identified an key-value pair
518  */
519  bool operator()( const KeyValueType& /* element */ ) const
520  {
521  return true;
522  }
523 
524  /**
525  * Returns always false as it is only called for objects.
526  *
527  * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::CommentType
528  * \return always false since it identified an Object/comment
529  */
530  template< typename T >
531  bool operator()( const T& /* element */ ) const
532  {
533  return false;
534  }
535  };
536 
537  /**
538  * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a comment.
539  */
540  class IsCommentVisitor: public boost::static_visitor< bool >
541  {
542  public:
543  /**
544  * Returns always true as it is only called for comments.
545  *
546  * \return always true
547  */
548  bool operator()( const CommentType& /* element */ ) const
549  {
550  return true;
551  }
552 
553  /**
554  * Returns always false as it is only called for objects and key-value pairs.
555  *
556  * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::KeyValueType
557  * \return always false since it identified an Object/KeyValueType
558  */
559  template< typename T >
560  bool operator()( const T& /* element */ ) const
561  {
562  return false;
563  }
564  };
565 
566  /**
567  * Visitor to query the m_name member of \ref WStructuredTextParser::ObjectType and \ref WStructuredTextParser::KeyValueType.
568  */
569  class NameQueryVisitor: public boost::static_visitor< std::string >
570  {
571  public:
572  /**
573  * Comments have no name.
574  *
575  * \return empty string.
576  */
577  std::string operator()( const CommentType& /* element */ ) const
578  {
579  return "";
580  }
581 
582  /**
583  * Returns the m_name member of the specified object or key-valuev pair.
584  *
585  * \param element Specified object.
586  *
587  * \tparam T one of the types of the \ref WStructuredTextParser::MemberType variant
588  * \return always true since it identified an key-value pair
589  */
590  template< typename T >
591  std::string operator()( const T& element ) const
592  {
593  return element.m_name;
594  }
595  };
596 
597  template< typename T >
598  std::vector< T > StructuredValueTree::getValues( std::string key ) const
599  {
600  // traverse the tree
601  std::vector< ObjectType > rObj;
602  std::vector< KeyValueType > rKV;
603 
604  // traverse
605  traverse( m_file, key, rObj, rKV );
606 
607  // copy to result vector and cast
608  std::vector< T > r;
609  for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i )
610  {
611  try
612  {
613  r.push_back( string_utils::fromString< T >( ( *i ).m_value ) );
614  }
615  catch( ... )
616  {
617  // convert the standard exception (if cannot convert) to a WTypeMismnatch.
618  throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." );
619  }
620  }
621 
622  // done
623  return r;
624  }
625 }
626 
627 #endif // WSTRUCTUREDTEXTPARSER_H
628 
Indicates invalid value which could not be found.
Definition: WNotFound.h:36
Test parsing and query functionality.
Indicates invalid type of something.
Definition: WTypeMismatch.h:37
This namespace contains the WStructuredTextParser data types and the parser.
std::vector< MemberType > FileType
An object representing all objects and comments on file level.
std::string ValueType
The type used for values.
std::string KeyType
we use these quite often, so define some short alias for them:
std::string CommentType
The type used for comments.
boost::variant< boost::recursive_wrapper< ObjectType >, KeyValueType, CommentType > MemberType
A node inside the AST is either another object or a key-value pair.
KeyValueType - a tuple containing name and value.
An object is always a name and contains several further nodes.
std::string m_name
Name of the object.
std::vector< MemberType > m_nodes
Object's members.