Re: Help with Boost Grammar_问答_开发者_运维开发者技术经验分享

I have redesigned and extended the grammar I asked about earlier as shown below:

// BIFAnalyser.cpp : Defines the entry point for the console application.
//
//
/*=============================================================================
    Copyright (c) Temitope Jos Onunkun 2010 
    http://www.dcs.kcl.ac.uk/pg/onun/

    Use, modification and distribution is subject to the Boost Software
    License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
    http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
////////////////////////////////////////////////////////////////////////////
//                                                                        //
//  B Machine parser using the Boost "Grammar" and "Semantic Actions".    // 
//                                                                        //
////////////////////////////////////////////////////////////////////////////

#include <boost/spirit/core.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <string>
#include <fstream>
#include <vector>


////////////////////////////////////////////////////////////////////////////
using namespace std;
using namespace boost::spirit;

////////////////////////////////////////////////////////////////////////////
//
//  Semantic Actions
//
////////////////////////////////////////////////////////////////////////////
//
//
namespace 
{



 //semantic action function on individual lexeme
    void    do_noint(char const* start, char const* end)
    {
        string  str(start, end);

 if (str != "NAT1")
        cout << "PUSH(" << str << ')' << endl;
    }

 //semantic action function on addition of lexemes
    void do_add(char const*, char const*)    
 { 
  cout << "ADD" << endl; 
 // for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
 //  cout << *vi << " ";
 }

 //semantic action function on subtraction of lexemes
    void do_subt(char const*, char const*)   
 { 
  cout << "SUBTRACT" << endl; 
 }

 //semantic action function on multiplication of lexemes
    void do_mult(char const*, char const*)   
 { 
  cout << "\nMULTIPLY" << endl;
 }

 //semantic action function on division of lexemes
    void do_div(char const*, char const*)    
 { 
  cout << "\nDIVIDE" << endl; 

 }


//

//

 vector<vector<string> > flowTable;


 //semantic action function on simple substitution
    void do_sSubst(char const* start, char const* end)    
 { 
  string  str(start, end);

  //use boost tokenizer to break down tokens
  typedef boost::tokenizer<boost::char_separator<char> > Tokenizer;
  boost::char_separator<char> sep(" -+/*:=()",0,boost::drop_empty_tokens); // char separator definition
        Tokenizer tok(str, sep);
  Tokenizer::iterator tok_iter = tok.begin();

  pair<string, string > dependency; //create a pair object for dependencies


  //create a vector object to store all tokens 
  vector<string> dx;
   // 

  int counter = 0; // tracks token position

  for(tok.begin(); tok_iter != tok.end(); ++tok_iter)  //save all tokens in vector
  {
   dx.push_back(*tok_iter );
  }
  counter = dx.size();

  //
  vector<string> d_hat; //stores set of dependency pairs

  string dep; //pairs variables as string object

  //
  dependency.first = *tok.begin();
  vector<string> FV;

  for(int unsigned i=1; i < dx.size(); i++)
  {   
    //
   if(!atoi(dx.at(i).c_str()) && (dx.at(i) !=" ")) 
   {
    dependency.second = dx.at(i);
    dep = dependency.first + "|->" + dependency.second + " ";
    d_hat.push_back(dep);

    vector<string> row; 
    row.push_back(dependency.first);  //push x_hat into first column of each row

    for(unsigned int j=0; j<2; j++)
    {

     row.push_back(dependency.second);//push an element (column) into the row
    }

    flowTable.push_back(row); //Add the row to the main vector

   }
  }

  //displays internal representation of information flow table
  cout << "\n******************************\nDependency Table\n******************************\n";
  cout << "X_Hat\tDx\tG_Hat\n";
  cout << "-----------------------------\n";
  for(unsigned int  i=0; i < flowTable.size(); i++)
  {
   for(unsigned int j=0; j<2; j++)
   {
    cout << flowTable[i][j] << "\t ";
   }
   if (*tok.begin() != "WHILE" ) //if there are no global flows,
    cout << "\t{}";     //display empty set 

   cout << "\n";
  } 
  cout << "*****************************\n\n";


  for(int unsigned j=0; j < FV.size(); j++)
  {  
   if(FV.at(j) != dependency.second)
    dep = dependency.first + "|->" + dependency.second + " ";
   d_hat.push_back(dep);
  }





  cout << "PUSH(" << str << ')' << endl;

  cout << "\n****************\nDependency pairs\n****************\n";

  for(int unsigned i=0; i < d_hat.size(); i++)
   cout << d_hat.at(i) << "\n...\n";

  cout << "\nSIMPLE SUBSTITUTION\n\n";

 }

    //semantic action function on multiple substitution
 void do_mSubst(char const* start, char const* end)    
 { 
  string  str(start, end);


  cout << "PUSH(" << str << ')' << endl;
  //cout << "\nMULTIPLE SUBSTITUTION\n\n";
 } 


 //semantic action function on unbounded choice substitution
 void do_mChoice(char const* start, char const* end)    
 { 
  string  str(start, end);

  cout << "PUSH(" << str << ')' << endl;
  cout << "\nUNBOUNDED CHOICE SUBSTITUTION\n\n";
 } 


 void do_logicExpr(char const* start, char const* end)    
 { 
  string  str(start, end);

  //use boost tokenizer to break down tokens
  typedef boost::tokenizer<boost::char_separator<char> > Tokenizer;
  boost::char_separator<char> sep(" -+/*=:()><",0,boost::drop_empty_tokens); // char separator definition
        Tokenizer tok(str, sep);
  Tokenizer::iterator tok_iter = tok.begin();

  //pair<string, string > dependency; //create a pair object for dependencies


  //create a vector object to store all tokens 
  vector<string> dx;

  for(tok.begin(); tok_iter != tok.end(); ++tok_iter)  //save all tokens in vector
  {
   dx.push_back(*tok_iter );
  }

  for(unsigned int i=0; i<dx.size(); i++)
   if(!atoi(dx.at(i).c_str()) && (dx.at(i) !=" ") ) 
   {
    cout << "\nFree Variables: " << dx.at(i)<< endl;
   }


  cout << "PUSH(" << str << ')' << endl;
  cout << "\nPREDICATE\n\n";
 } 


 void  do_predicate(char const* start, char const* end)    
 { 
  string  str(start, end);

  cout << "PUSH(" << str << ')' << endl;
  cout << "\nMULTIPLE PREDICATE\n\n";
 } 


 void do_ifSelectPre(char const* start, char const* end)    
 { 
  string  str(start, end);

  //if

  cout << "PUSH(" << str << ')' << endl;
  cout << "\nPROTECTED SUBSTITUTION\n\n";
 } 


 //semantic action function on machine substitution
 void do_machSubst(char const* start, char const* end)    
 { 
  string  str(start, end);



  cout << "PUSH(" << str << ')' << endl;
  cout << "\nMACHINE SUBSTITUTION\n\n";
 } 
} 

////////////////////////////////////////////////////////////////////////////
//
//  Machine Substitution Grammar
//
////////////////////////////////////////////////////////////////////////////

//  Simple substitution grammar parser with integer values removed
struct Substitution : public grammar<Substitution>
{
    template <typename ScannerT>
    struct definition
    {
        definition(Substitution const& )
        {

   machine_subst
     = (  (simple_subst)
        | (multi_subst)
        | (if_select_pre_subst)
        | (unbounded_choice) )[&do_machSubst]
     ;


   unbounded_choice
     = str_p("ANY") >> ide_list
     >> str_p("WHERE") >> predicate
     >> str_p("THEN")   
     >> machine_subst
     >> str_p("END")
     ;

   if_select_pre_subst
     = (  (  str_p("IF") >> predicate >> str_p("THEN") >> machine_subst 
     >> *( str_p("ELSIF") >> predicate >> machine_subst )
     >> !( str_p("ELSE") >> machine_subst) 
     >> str_p("END") )
     | ( str_p("SELECT") >> predicate >> str_p("THEN") >> machine_subst
     >> *( str_p("WHEN") >> predicate >> machine_subst )
     >> !( str_p("ELSE") >> machine_subst) 
     >> str_p("END"))
     | ( str_p("PRE") >> predicate >> str_p("THEN") >> machine_subst 
     >> str_p("END") )  )[&do_ifSelectPre]
     ;



   multi_subst
     = ( (machine_subst) 
     >> *( ( str_p("||") >> (machine_subst) ) 
     |     ( str_p("[]") >> (machine_subst) ) )  ) [&do_mSubst]
     ;

   simple_subst
     = (identifier
     >> str_p(":=") >> arith_expr) [&do_sSubst]
     ;

   expression
     = predicate
     | arith_expr
     ;

   predicate
     = ( (logic_expr) 
     >> *( ( ch_p('&') >> (logic_expr) )
     |   ( str_p("OR") >> (logic_expr) ) ) )[&do_predicate]
     ;

   logic_expr 
     = ( identifier
     >> (str_p("<") >> arith_expr)
     | (str_p("<")  >> arith_expr)
     | (str_p("/:")  >> arith_expr)
     | (str_p("<:")  >> arith_expr)
     | (str_p("/<:")  >> arith_expr)
     | (str_p("<<:")  >> arith_expr)
     | (str_p("/<<:")  >> arith_expr)
     | (str_p("<=")  >> arith_expr)
     | (str_p("=")  >> arith_expr)
     | (str_p(">=")  >> arith_expr)
     | (str_p("=>")  >> arith_expr)
     )  [&do_logicExpr]
     ;

   arith_expr
     =   term
     >>  *(   ('+' >> term)[&do_add]
     |   ('-' >> term)[&do_subt] )
          ;

   term
     =   factor
     >>  *(   ('*' >> factor)[&do_mult]
     |   ('/' >> factor)[&do_div]  )
     ;


   factor
     =   lexeme_d[( identifier | +digit_p)[&do_noint]]
          |   '(' >> expression >> ')'
          |   ('+' >> factor)
          ;


   ide_list
     = identifier 
     >> *( ch_p(',') >> identifier )
     ;


   identifier
     = alpha_p >> +( alnum_p | ch_p('_') )
     ;

  }

  rule<ScannerT> machine_subst, unbounded_choice, if_select_pre_subst, multi_subst, 
        simple_subst, expression, predicate, logic_expr, arith_expr, 
        term, factor, ide_list, identifier;

        rule<ScannerT> const&
        start() const 
  { 
   return predicate; 
   //return multi_subst; 
   //return machine_subst; 
  }
    };
};

////////////////////////////////////////////////////////////////////////////
//
//  Main program
//
////////////////////////////////////////////////////////////////////////////
int
main()
{
    cout << "************************************************************\n\n";
    cout << "\t\t...Machine Parser...\n\n";
    cout << "************************************************************\n\n";
   // cout << "Type an expression...or [q or Q] to quit\n\n";

 string str;
 int machineCount = 0;
 char strFi开发者_如何学JAVAlename[256]; //file name store as a string object
 do
 {

 cout << "Please enter a filename...or [q or Q] to quit:\n\n "; //prompt for file name to be input
 //char strFilename[256]; //file name store as a string object
 cin >> strFilename;

 if(*strFilename == 'q' || *strFilename == 'Q') //termination condition
   return 0;

 ifstream inFile(strFilename); // opens file object for reading
 //output file for truncated machine (operations only)

   if (inFile.fail())    
   cerr << "\nUnable to open file for reading.\n" << endl;

 inFile.unsetf(std::ios::skipws); 


    Substitution elementary_subst;  //  Simple substitution parser object


    string next;

 while (inFile >> str)
    {
  getline(inFile, next);

  str += next;

        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

   parse_info<> info = parse(str.c_str(), elementary_subst >> !end_p, space_p);

   if (info.full)
   {
    cout << "\n-------------------------\n";
    cout << "Parsing succeeded\n";
    cout << "\n-------------------------\n";
   }
   else
   {
    cout << "\n-------------------------\n";
    cout << "Parsing failed\n";
    cout << "stopped at: " << info.stop << "\"\n";
    cout << "\n-------------------------\n";
   }

 }


}
while ( (*strFilename != 'q' || *strFilename !='Q')); 

    return 0;
}

However, I am experiencing the following unexpected behaviours on testing:

The text files I used are:

f1.txt,  ... containing ...:  debt:=(LoanRequest+outstandingLoan1)*20 .
f2.txt,  ... containing ...:  debt:=(LoanRequest+outstandingLoan1)*20 || newDebt := loanammount-paidammount || price := purchasePrice + overhead + bb .
f3.txt,  ... containing ...:  yy < (xx+7+ww) .
f4.txt,  ... containing ...:  yy < (xx+7+ww) & yy : NAT .

When I use multi_subst as start rule both files (f1 and f2) are parsed correctly;

When I use machine_subst as start rule file f1 parse correctly, while file f2 fails, producing the error: “Parsing failed stopped at: || newDebt := loanammount-paidammount || price := purchasePrice + overhead + bb”

When I use predicate as start symbol, file f3 parse correctly, but file f4 yields the error: “ “Parsing failed stopped at: & yy : NAT”

Can anyone help with the grammar, please? It appears there are problems with the grammar that I have so far been unable to spot.

I have redesigned the grammar as follows, and that seem to have fixed the problem:

subst = multi_choice | machine_subst ;

        multi_choice 
                = machine_subst
                >> +( str_p("[]") >> machine_subst )
                ;


        machine_subst
                = (  multi_subst 
                | simple_subst
                | if_select_pre_subst
                | unbounded_choice )[&do_machSubst]
                ;

... ...

multi_subst = ( simple_subst

+( str_p("||") >> simple_subst ) ) [&do_mSubst] ;

        simple_subst
                = (identifier
                >> str_p(":=") >> arith_expr) [&do_sSubst]
                ;

        expression
                = predicate
                | logic_expr
                | arith_expr
                ;

        predicate
                = ( logic_expr 
                >> +( ( str_p("&") >> logic_expr )
                |     ( str_p("OR") >> logic_expr ) ) )[&do_predicate]
                ;

        logic_expr 
                = ( identifier
                >> ( (str_p("<") >> arith_expr)
                | (str_p(">")  >> arith_expr)
                | (str_p("/:")  >> arith_expr)
                | (str_p("<:")  >> arith_expr)
                | (str_p("/<:")  >> arith_expr)
                | (str_p("<<:")  >> arith_expr)
                | (str_p("/<<:")  >> arith_expr)
                | (str_p("<=")  >> arith_expr)
                | (str_p("=")  >> arith_expr)
                | (str_p(">=")  >> arith_expr)
                | (str_p("=>")  >> arith_expr) )
                )  [&do_logicExpr]
                ;

... ... I now use the start rule "subst" for files f1.txt and f2.txt and "expression" for f3.txt and f4.txt.

start() const { return subst; //return machine_subst; //return expression; //return if_select_pre_subst; //return multi_choice; //return unbounded_choice; }

I am still building the grammar up, so if I have any further issues, I will post it.