/* --------------------------------------------------------------------------
 *
 * Copyright (C) 2007 Leif Erik Larsen, Kjerringvik, Norway.
 *
 * This file is part of the Open Source Edition of Larsen Commander, as
 * available from http://home.online.no/~leifel/lcmd/.  This code is free 
 * software; you can redistribute it and/or modify it under the terms of 
 * the GNU General Public License version 3 only, as published by the 
 * Free Software Foundation.  
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 3 at http://www.gnu.org/licenses/gpl-3.0.txt for more details 
 * (a copy is included in the LICENSE file that accompanied this code).
 *
 * ------------------------------------------------------------------------ */

#ifndef __GLIB_RCTOKENIZER
#define __GLIB_RCTOKENIZER

#include "glib/primitives/aptr.h"
#include "glib/resource/GRcToken.h"
#include "glib/resource/GRcIncludeModule.h"
#include "glib/util/GAbstractTokenizer.h"
#include "glib/util/GStack.h"
#include "glib/util/GKeyBag.h"
#include "glib/util/GSearchPath.h"
#include "glib/util/GExpressionParser.h"
#include "glib/vfs/GVfsLocal.h"

/**
 * Tokenizer class that will pre-compile an input stream of an ICA Resource
 * Script.
 *
 * An IBAS ICA Resource Script is a lexical description of one or more
 * UI-Resources to be used by one or several programs. Available types of
 * Resources are:
 * <b>TEXT</b> ({@link ITextResource}),
 * <b>MENU</b> ({@link IMenuResource}),
 * <b>TOOLBAR</b> ({@link IToolBarResource}),
 * <b>ICON</b> ({@link IIconResource}),
 * <b>DIALOGBOX</b> ({@link IDialogResource}),
 * <b>IMAGE</b> ({@link IImageResource}),
 * <b>CURSOR</b> ({@link ICursorResource}),
 * <b>ACCELERATOR</b> ({@link IAccelTableResource}) and
 * <b>QUERY</b> ({@link IQueryResource}).
 *
 * Such Resources may be put on a program defined window to simplify the task
 * of managing the User Interface of the program. In addition one or more
 * Resources may be combined together. For example, a Dialog Box Resource
 * may contain a Menu Resource and a Toolbar Resource, a Toolbar Resource
 * may consist of one or more Icon Resources to form the buttons, and a
 * Menu Resource may contain a number of Text strings defined in a Text
 * Table Resource. Such relationships between Resources may be defined
 * directly in the Resource Script of the Program, without having to mess
 * with the source code of the program and recompile it.
 *
 * <font size=+1><b>Some words about the syntax</b></font>
 *
 * <b>Comments</b><br>
 * The resource script may contain comments. Comments are not required,
 * but may be helpful during resource development and maintenance. There
 * are two types of comments supported by the ICA Resource Compiler.
 * They are:
 *
 * <ul>
 * <li>Comments that begin with // and goes out the line (as in C++).
 * <li>Comments that begins with <code>/</code><code>*</code> and goes until
 *     the terminating <code>*</code><code>/</code> (as in C).
 * </ul>
 *
 * <b>Note</b> that the ICA Resource Script compiler supports nested
 * blocks of comments.
 *
 * <b>Pre-compiler Directives</b><br>
 * The ICA Resource Compiler is parsing the script in the following way:
 *
 * <ul>
 * <li>All comments are skipped.
 * <li>The remaining part of the script is pre-compiled.
 * <li>The remaining part of the script is finally compiled by the
 *     actual Resource Script Compiler.
 * </ul>
 *
 * This is about step two, the pre-compiler.
 *
 * For the most part the pre-compiler of the ICA Resource Script Compiler
 * follows the same rules and syntax as of the pre-compiler of ANSI C/C++.
 * The main difference is that the ICA Resource Script pre-compiler is
 * stream-oriented rather than line-oriented. This means that you can
 * specify pre-compiler directives anywhere in the resource scrip, and you
 * can specify more than one such directive in the same line of code.
 * Another main difference is that the compiler directives of the
 * ICA Resource compiler is case insensitive.
 *
 * The pre-compiler is capable of macro substitution, conditional compilation,
 * and inclusion of named files. Tokens beginning with # communicate with
 * this pre-compiler. Such statements have a syntax independent of the rest
 * of the scripting language; they may appear anywhere and have effect which
 * lasts until the end of the resource script file.
 *
 * Lets take a look at the available pre-compiler directives of the
 * ICA Resource Compiler.
 *
 * <b><code>#include</code></b><br>
 * A compiler control statement of the form
 *
 * <code>#include "filename"</code>
 *
 * causes the replacement of that statement by the entire contents of the
 * file filename. The named file is search for in the directories defined
 * in the environment variable "server.rc.path".
 *
 * The resource script may contain one or more #include statements to include
 * the source of common resources that are actually defined in their own
 * resource script files outside your resource script file.
 *
 * Note that the <code>.rc</code> extension should not be specified as part
 * of the #include statement. The ICA Resource Compiler will add it
 * automatically.
 *
 * All the resources in the file <code>ibas\ge\stdicons.rc</code> will then
 * be compiled and included to your resource table just as if they were
 * defined in your source file at the point that contains the
 * <code>#include</code> statement.
 *
 * A resource script may contain nested <code>#include</code> statements.
 *
 * <b><code>#includetext</code></b><br>
 * A compiler control statement of the form
 *
 * <code>#includetext "logical filename"</code>
 *
 * causes the replacement of that statement by the entire contents of the
 * file logical filename. The named file is search for in the directories
 * defined in the environment variable "server.rc.path.text".
 *
 * This compiler directive is implemented first and foremost to be used in
 * advanced menu definitions to include a set of language specific texts to
 * be used in the various menu items. The specified include-module is
 * specified as a logical name which will be automatically adjusted by
 * the ICA resource compiler during the compilation.
 *
 * <b><code>#includelogic</code></b><br>
 * A compiler control statement of the form
 *
 * <code>#includelogic "logical filename"</code>
 *
 * causes the replacement of that statement by the entire contents of the
 * file logical filename. The named file is search for in the directories
 * defined in the environment variable "server.rc.path.logic".
 *
 * This compiler directive is implemented first and foremost to be used in
 * advances menu definitions to include part of a menu definition.
 * The specified include-module is specified as a logical name which will
 * be automatically adjusted by the ICA resource compiler during the
 * compilation.
 *
 * <b><code>#define</code></b><br>
 * A compiler directive statement of the form
 *
 * <code>#define identifier token-string</code>
 *
 * causes the pre-compiler to replace subsequent instances of the identifier
 * with the given string of tokens. The replacement string is re-scanned for
 * more defined identifiers.
 *
 * This facility is most valuable for definition of constants of which to
 * test against in one or more <code>#if</code>, <code>#ifdef</code> or
 * <code>#ifndef</code> directives later on in the script.
 *
 * Note that the identifier is case sensitive.
 *
 * <b><code>#undef</code></b><br>
 * A pre-compiler directive statement of the form
 *
 * <code>#undef identifier</code>
 *
 * causes the identifiers pre-compiler definition to be forgotten.
 *
 * <font size=+1><b>Conditional directives</b></font>
 *
 * A pre-compiler directive statement of the form
 *
 * <code>#if constant-expression</code>
 *
 * checks whether the constant expression evaluates to non-zero.
 * A pre-compiler directive of the form
 *
 * <code>#ifdef identifier</code>
 *
 * checks whether the identifier is currently defined in the pre-compiler;
 * that is, whether it has been a subject of a recent <code>#define</code>
 * statement. A pre-compiler directive of the form
 *
 * <code>#ifndef identifier</code>
 *
 * checks whether the identifier is currently undefined in the pre-compiler.
 *
 * All of the above three forms are followed by an arbitrary number of
 * statements, possibly containing a pre-compiler statement
 *
 * <code>#else</code>
 *
 * and then by a pre-compiler directive
 *
 * <code>#endif</code>
 *
 * If the checked condition is true then any scripting code between
 * <code>#else</code> and <code>#endif</code> are ignored. If the checked
 * condition is false then any scripting code between the test and an
 * <code>#else</code> or, lacking an <code>#else</code>, the
 * <code>#endif</code>, are ignored.
 *
 * These constructions may be nested.
 *
 * <b><code>#error</code></b><br>
 * This will cause the Resource Compiler to stop compiling immediately
 * and return with an error code. The following text will be shown in a
 * message box in IBAS if the compiler was called by IBAS, or it will be
 * printed on stderr, with the prefix "Error: ", if the compiler was called
 * from the command line.
 *
 * <b><code>#warning</code></b><br>
 * This directive will print the following text on stderr, with the
 * prefix "Warning: ", if the compiler was called from the command line, or
 * it will be completely ignored if the compiler was called from IBAS.
 *
 * <b><code>#logg</code></b><br>
 * The following text will be appended to the text file pointed to by the
 * environment variable "server.rc.logg.path". If this environment
 * variable isnt defined then the text will not be logged at all. ICA will
 * automatically append a linefeed character to the specified text.
 *
 * <b><code>#debug</code></b><br>
 * This statement can be used in conjunction with the ICA Resource Compiler
 * as a help when debugging the compiler. This directive is normally not used.
 *
 * <font size=+1><b>Pre-defined macros</b></font><br>
 * There are a number of pre-compiler identifiers that are always
 * automatically defined.
 *
 * __PRG_VERSTR__<br>
 * Equals the version string of the program.
 *
 * __VERSTR__<br>
 * Equals the version string of the resource compoler.
 * This is "1.0" at the time being.
 *
 * _MODULENAME<br>
 * Equals the name of the entry level resource script module name
 * being compiled.
 *
 * _LANGUAGE<br>
 * The preferred language for text resources, etc.
 *
 * @author  Leif Erik Larsen
 * @since   2000.09.04
 */
class GRcTokenizer : public GAbstractTokenizer
{
   friend class GRcException;

   private:

      /**
       * Class used to represent a #if-#else-#endif directive.
       */
      class SingleIf : public GObject
      {
         public:

            /** True if we are inside the block that is to be compiled. */
            bool isTrue;

            /** True when the #ELSE has been passed for this level. */
            bool hasReachedElse;

         public:

            explicit SingleIf ( bool isTrue );
            virtual ~SingleIf ();
      };

      /** Buffer of where to put tokens as we read them. */
      GRcToken currentToken;

      /** The expression parser, if any. */
      GExpressionParser* expParser;

      /** The file system from where we loads include-modules, if any. */
      GVfsLocal vfs;

      /** The semicolon separated list if directories of where to search for rc-files. */
      GSearchPath searchPath;

      /** Fully qalified path or name of the top level script file module or stream. */
      GString srcPath;

      /** Stack of "#include..." modules. */
      GStack<GRcIncludeModule> incl;

      /** Pointer to the "current" module. This is always the same as "incl.top()" */
      GRcIncludeModule* curIncl;

      /** Stack of "#if..." directive CB's. */
      GStack<SingleIf> ifCB;

      /** Case sensitive bag of #define's. */
      GKeyBag<GString> defs;

      /** The #DEFINE-string being currently parsed, if any. */
      GString curDef;

      /** Index position of cursor within curDef. */
      int curDefPos;

   public:

      // Symbol tokens.
      static const GString Token_comma;    // ","
      static const GString Token_dir;      // "#"
      static const GString Token_eolcom;   // "//"
      static const GString Token_eq;       // "="
      static const GString Token_lcom;     // "/*"
      static const GString Token_lpar;     // "("
      static const GString Token_rcom;     // "*/"
      static const GString Token_rpar;     // ")"
      static const GString Token_lbracket; // "{"
      static const GString Token_rbracket; // "}"

      // Precompiler directive tokens.
      static const GString Token_debug;
      static const GString Token_define;
      static const GString Token_endif;
      static const GString Token_else;
      static const GString Token_error;
      static const GString Token_if;
      static const GString Token_ifdef;
      static const GString Token_ifndef;
      static const GString Token_include;
      static const GString Token_log;
      static const GString Token_undef;
      static const GString Token_warning;

      // Other tokens.
      static const GString Token_alt;
      static const GString Token_control;
      static const GString Token_num;
      static const GString Token_up;
      static const GString Token_text;
      static const GString Token_hint;
      static const GString Token_id;
      static const GString Token_key;
      static const GString Token_shift;
      static const GString Token_userdata;
      static const GString Token_xpos;
      static const GString Token_ypos;
      static const GString Token_width;
      static const GString Token_height;
      static const GString Token_constraints;
      static const GString Token_oily;
      static const GString Token_disabled;
      static const GString Token_toggleon;
      static const GString Token_hidden;
      static const GString Token_acceltable;
      static const GString Token_menu;
      static const GString Token_toolbar;
      static const GString Token_dialog;
      static const GString Token_icon;
      static const GString Token_layout;
      static const GString Token_closebutton;
      static const GString Token_resizable;
      static const GString Token_colorbits;
      static const GString Token_palette;
      static const GString Token_pixels;
      static const GString Token_userdata1;
      static const GString Token_invisible;
      static const GString Token_item;
      static const GString Token_popup;
      static const GString Token_separator;
      static const GString Token_alt1;
      static const GString Token_alt2;
      static const GString Token_alt3;
      static const GString Token_alt4;
      static const GString Token_alt5;
      static const GString Token_alt6;
      static const GString Token_alt7;
      static const GString Token_alt8;
      static const GString Token_alt9;

   public:

      /**
       * Construct a tokenizer for the specified string.
       *
       * @author  Leif Erik Larsen
       * @since   2005.03.09
       * @param   str         The string to be tokenized, or (if isFileName
       *                      is true) the path of which file to be
       *                      tokenized.
       * @param   isFileName  True if the specified string is actually a path
       *                      of a file of which content is to be tokenized,
       *                      or else false if the string is to be tokenized
       *                      as is, through a {@link GStringStream}.
       * @param   searchPath  The semicolon separated list of directories 
       *                      of where to search for the rc-files during 
       *                      loading. This search path is also used for
       *                      loading any #include-modules.
       * @param   ppdefines   Container of Pre-Processor Defines. Can be null.
       * @param   GRcException in case of any error creating the tokenizer.
       */
      GRcTokenizer ( const GString& str, 
                     bool isFileName, 
                     const GSearchPath& searchPath, 
                     const GKeyBag<GString>* ppdefines = null );

      /**
       * Construct a tokenizer for the specified input stream.
       *
       * <b>Mark</b> that this constructor is a "lightweight" version of the
       * above declared main constructor. Using this lightweight constrcutor
       * does not define any of the documented predefined macros. The main purpose
       * of this lightweight version of the constructor is for usage by
       * {@link GComponentParams#GComponentParams(const GString&) only.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.04
       * @param   stream      The stream of where to read characters. For
       *                      instance, this can be a {@link GStringStream}.
       */
      explicit GRcTokenizer ( GInputStream& stream );

      virtual ~GRcTokenizer ();

   private:

      /**
       * Evaluate the next expression as a bool expression and return
       * the boolean value result from the evaluation.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.05
       * @throws  GRcException in case of any error.
       */
      bool parseIf ();

      /**
       * Evaluate the next expression as an integer expression and return
       * the integer value result from the evaluation.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.05
       * @throws  GRcException in case of any error.
       */
      int parseInteger ();

      /**
       * Evaluate the next expression as a double expression and return
       * the double value result from the evaluation.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.05
       * @throws  GRcException in case of any error.
       */
      double parseFloat ();

      /**
       * Read next source line from current source file.
       *
       * The red source line will have no terminating linefeed character.
       * The terminating character will be a '\0'
       *
       * @since   2000.09.05
       * @return  True if a new line was read, or else false if EOF.
       * @throws  GRcException in case of any error.
       */
      bool readNextLine ();

      /**
       * Get the next character from the input stream.
       */
      char getNextChar ();

      /**
       * Undo the last call to {@link #getNextChar}.
       */
      void undoLastCharacter ();

      /**
       * Inner helper method to be used by {@link #readNextToken} only.
       *
       * For example, if cLine is:
       *
       * <pre>
       *    item Text="Open a file", ID=IDM_OPEN
       * </pre>
       *
       * The below words will be returned (in the specified order):
       *
       * <pre>
       *    1: "item"
       *    2: "Text"
       *    3: "="
       *    4: "Open a file"
       *    5: ","
       *    6: "ID"
       *    7: "="
       *    8: "IDM_OPEN"
       * </pre>
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.07
       * @throws  GRcException in case of any error.
       */
      void readNextToken_ ();

      /**
       * Read next token from the input stream.
       *
       * This method will only return those tokens which are not inside
       * a comment block. Thus, the only kind of parsing we are performing
       * here is to filter out comments. The caller must manually parse
       * compiler directives and only return user tokens back to the calling
       * resource script compiler.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.07
       * @param   doPreCompile True if we shall precompile symbols. Typically
       *                       this is false when called from #define,
       *                       #ifdef or #ifndef.
       * @param   acceptEOS    True if we shall accept End-Of-Stream, in
       *                       which case we will return an empty and
       *                       unquoted token.
       * @throws  GRcException in case of any error.
       */
      void readNextToken ( bool doPreCompile, bool acceptEOS );

   public:

      /**
       * Define the specified macro for the precompiler of the tokenizer.
       *
       * @since  2000.09.05
       * @param  symName    The case sensitive name of which symbol to define.
       * @param  defValue   The formatted value to be assigned to the symbol.
       * @throws GRcException in case of any error.
       */
      void defineMacro ( const char* symName, const GString& defValue );

      /**
       * @see GAbstractTokenizer#getNextAbstractToken
       */
      virtual const GAbstractToken* getNextAbstractToken ();

      /**
       * Fetch out next precompiled token from the input stream.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.07
       * @param   acceptEOS True if we shall accept End-Of-Stream, in which
       *                    case we will return an empty and unquoted token.
       *                    Else, if this parameter is false and we reach
       *                    the end of the stream, then we will throw
       *                    a {@link GRcException} exception.
       * @return  A pointer to the next precompiled token. If there are no
       *          more tokens then we will return a token
       *          whose <i>isEmpty()</i> method will return true. We will
       *          never return null. Use <i>toString()</i> on the returned
       *          token to get the token as a string (if needed). The
       *          returned token will always be a pointer
       *          to {@link #currentToken}.
       * @throws  GRcException in case of any error.
       */
      const GRcToken* nextPreCompiledToken ( bool acceptEOS );

      /**
       * @see GAbstractTokenizer#getCurModuleColumn
       */
      virtual int getCurModuleColumn () const { return incl.top()->srcLinePos; }

      /**
       * @see GAbstractTokenizer#getCurModuleLineNr
       */
      virtual int getCurModuleLineNr () const { return incl.top()->lineNr; }

      /**
       * Get the value as is the result of the next expression.
       * <p>
       * Note that this method should only be called if the specified token is a
       * starting pharantesis character.
       *
       * @author  Leif Erik Larsen
       * @since   1998.09.02
       * @return  The resulting R-Value of the next expression.
       * @throws  GRcException in case of any error.
       */
      aptr<GExpressionParser::RValue> parseExpression ();

      /**
       * Fetch out the value to the right of next "=" character.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.07
       * @throws  GRcException in case of any error.
       */
      const GRcToken* queryArgValue ();

      /**
       * Fetch out the value to the right of next "=" character.
       * The value is expected to be of type FLOAT. That is, a value
       * with or without decimals.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.07
       * @throws  GRcException in case of any error.
       */
      double queryArgValueFloat ();

      /**
       * Fetch out the value to the right of next "=" character.
       * The value is expected to be of type INTEGER. That is, a value
       * without any decimals.
       *
       * @author  Leif Erik Larsen
       * @since   2000.09.07
       * @throws  GRcException in case of any error.
       */
      int queryArgValueInt ();
};

#endif


