开发者

semantic phase of c compiler

开发者 https://www.devze.com 2023-02-25 09:39 出处:网络
if write 1=a in the sample c program, it doesnt detect it as an error. How do i solve this problem? Also how do i do global and local scope of variables. Thanks if anyone can solve it

if write 1=a in the sample c program, it doesnt detect it as an error. How do i solve this problem? Also how do i do global and local scope of variables. Thanks if anyone can solve it

clexer.lex source code

D           [0-9]
L           [a-zA-Z_]
H           [a-fA-F0-9]
E           [Ee][+-]?{D}+
FS          (f|F|l|L)
IS          (u|U|l|L)*

%{
#include <stdio.h>
#include "y.tab.h"
int cnt=1;
int line=1;
char tempid[100];
%}

%%
"/*"            {comment();}

"auto"          { cnt+=yyleng;ECHO; return(AUTO); }
"break"         { cnt+=yyleng;ECHO; return(BREAK); }
"case"          { cnt+=yyleng;ECHO; return(CASE); }
"char"          { cnt+=yyleng;ECHO; return(CHAR); }
"const"         { cnt+=yyleng;ECHO; return(CONST); }
"continue"      { cnt+=yyleng;ECHO; return(CONTINUE); }
"default"       { cnt+=yyleng;ECHO; return(DEFAULT); }
"do"            { cnt+=yyleng;ECHO; return(DO); }
"double"        { cnt+=yyleng;ECHO; return(DOUBLE); }
"else"          { cnt+=yyleng;ECHO; return(ELSE); }
"enum"          { cnt+=yyleng;ECHO; return(ENUM); }
"extern"        { cnt+=yyleng;ECHO; return(EXTERN); }
"float"         { cnt+=yyleng;ECHO; return(FLOAT); }
"for"           { cnt+=yyleng;ECHO; return(FOR); }
"goto"          { cnt+=yyleng;ECHO; return(GOTO); }
"if"            { cnt+=yyleng;ECHO; return(IF); }
"int"           { cnt+=yyleng;ECHO; return(INT); }
"long"          { cnt+=yyleng;ECHO; return(LONG); }
"register"      { cnt+=yyleng;ECHO; return(REGISTER); }
"return"        { cnt+=yyleng;ECHO; return(RETURN); }
"short"         { cnt+=yyleng;ECHO; return(SHORT); }
"signed"        { cnt+=yyleng;ECHO; return(SIGNED); }
"sizeof"        { cnt+=yyleng;ECHO; return(SIZEOF); }
"static"        { cnt+=yyleng;ECHO; return(STATIC); }
"struct"        { cnt+=yyleng;ECHO; return(STRUCT); }
"switch"        { cnt+=yyleng;ECHO; return(SWITCH); }
"typedef"       { cnt+=yyleng;ECHO; return(TYPEDEF); }
"union"         { cnt+=yyleng;ECHO; return(UNION); }
"unsigned"      { cnt+=yyleng;ECHO; return(UNSIGNED); }
"void"          { cnt+=yyleng;ECHO; return(VOID); }
"volatile"      { cnt+=yyleng;ECHO; return(VOLATILE); }
"while"         { cnt+=yyleng;ECHO; return(WHILE); }
(['])+({L}|{D})+(['])           { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})*       { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }

0[xX]{H}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}?       { cnt+=yyleng;ECHO; return(CONSTANT); }
L?'(\\.|[^\\'])+'   { cnt+=yyleng;ECHO; return(CONSTANT); }

{D}+{E}{FS}?        { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }

L?\"(\\.|[^\\"])*\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }

"..."           { cnt+=yyleng;ECHO; return(ELLIPSIS); }
">>="           { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
"<<="           { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
"+="            { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
"-="            { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
"*="            { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
"/="            { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
"%="            { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
"&="            { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
"^="            { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
"|="            { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
">>"            { cnt+=yyleng;ECHO; return(RIGHT_OP); }
"<<"            { cnt+=yyleng;ECHO; return(LEFT_OP); }
"++"            { cnt+=yyleng;ECHO; return(INC_OP); }
"--"            { cnt+=yyleng;ECHO; return(DEC_OP); }
"->"            { cnt+=yyleng;ECHO; return(PTR_OP); }
"&&"            { cnt+=yyleng;ECHO; return(AND_OP); }
"||"            { cnt+=yyleng;ECHO; return(OR_OP); }
"<="            { cnt+=yyleng;ECHO; return(LE_OP); }
">="            { cnt+=yyleng;ECHO; return(GE_OP); }
"=="            { cnt+=yyleng;ECHO; return(EQ_OP); }
"!="            { cnt+=yyleng;ECHO; return(NE_OP); }
";"         { cnt+=yyleng;ECHO; return(';'); }
("{"|"<%")      { cnt+=yyleng;ECHO; return('{'); }
("}"|"%>")      { cnt+=yyleng;ECHO; return('}'); }
","         { cnt+=yyleng;ECHO; return(','); }
":"         { cnt+=yyleng;ECHO; return(':'); }
"="         { cnt+=yyleng;ECHO; return('='); }
"("         { cnt+=yyleng;ECHO; return('('); }
")"         { cnt+=yyleng;ECHO; return(')'); }
("["|"<:")      { cnt+=yyleng;ECHO; return('['); }
("]"|":>")      { cnt+=yyleng;ECHO; return(']'); }
"."         { cnt+=yyleng;ECHO; return('.'); }
"&"         { cnt+=yyleng;ECHO; return('&'); }
"!"         { cnt+=yyleng;ECHO; return('!'); }
"~"         { cnt+=yyleng;ECHO; return('~'); }
"-"         { cnt+=yyleng;ECHO; return('-'); }
"+"         { cnt+=yyleng;ECHO; return('+'); }
"*"         { cnt+=yyleng;ECHO; return('*'); }
"/"         { cnt+=yyleng;ECHO; return('/'); }
"%"         { cnt+=yyleng;ECHO; return('%'); }
"<"         { cnt+=yyleng;ECHO; return('<'); }
">"         { cnt+=yyleng;ECHO; return('>'); }
"^"         { cnt+=yyleng;ECHO; return('^'); }
"|"         { cnt+=yyleng;ECHO; return('|'); }
"?"         { cnt+=yyleng;ECHO; return('?'); }

[ ]         {cnt+=yyleng;ECHO;}
[\t\v\f]        { cnt+=yyleng; }
[\n]            {line++;cnt=1;}
.           { /* ignore bad characters */ }

%%
yywrap()
{
    return(1);
}
comment()
{
    char c, c1;
loop:
    while ((c = input()) != '*' && c != 0)
    {
        if(c=='\n') {line++;cnt=1;} 
        else    {cnt++;}
    }
        //putchar(c); PUTCHAR only if comments need to be shown! 
    if ((c1 = input()) != '/' && c1 != 0)
    {
        unput(c1);
        goto loop;
    }
}

cparser.yacc source code

%{
#include <stdio.h>
#include <string.h>
#include "symbol_table.h"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;

install()
{ 
    symrec *s;
    s = getsym (tempid);
    if (s == 0)
    s = putsym (tempid,temp);
    else 
    {
        printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");
        printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\n\n",line,cnt,s->name,s->type );
        exit(0);    
    }
    err1=1;
}
int context_check()
{ 
    symrec *s;
    s = getsym(tempid); 
    if (s == 0 )
    {printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\n\n",line,cnt,tempid);exit(0);return 0;}
    else
    return(s->type);
    err1=1;

}
type_err(int t1,int t2)
{
    if(t1&&t2)
    {
     printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");  
    printf( "\n\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \n\n",line,cnt,tempid,t1,t2);
    err1=1;
    exit(0);    
    }   
}

%}



%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE

%start translation_unit
%%

primary_expression
    : IDENTIFIER    {$$=context_check();}
    | CONSTANT
    | STRING_LITERAL
    | '(' expression ')' {$$= $2;}
    ;

postfix_expression
    : primary_expression    {$$=$1;}
    | postfix_expression '[' expression ']'
    | postfix_expression '(' ')'
    | postfix_expression '(' argument_expression_list ')'
    | postfix_expression '.' IDENTIFIER 
    | postfix_expression PTR_OP IDENTIFIER
    | postfix_expression INC_OP
    | postfix_expression DEC_OP
    ;

argument_expression_list
    : assignment_expression
    | argument_expression_list ',' assignment_expression
    ;

unary_expression
    : postfix_expression    {$$=$1;}
    | INC_OP unary_expression
    | DEC_OP unary_expression
    | unary_operator cast_expression
    | SIZEOF unary_expression
    | SIZEOF '(' type_name ')'
    ;

unary_operator
    : '&'
    | '*'
    | '+'
    | '-'
    | '~'
    | '!'
    ;

cast_expression
    : unary_expression  {$$=$1;}
    | '(' type_name ')' cast_expression
    ;

multiplicative_expression
    : cast_expression   {$$=$1;}
    | multiplicative_expression '*' cast_expression
    | multiplicative_expression '/' cast_expression
    | multiplicative_expression '%' cast_expression
    ;

additive_expression
    : multiplicative_expression {$$=$1;}
    | additive_expression '+' multiplicative_expression
    | additive_expression '-' multiplicative_expression
    ;

shift_expression
    : additive_expression   {$$=$1;}
    | shift_expression LEFT_OP additive_expression
    | shift_expression RIGHT_OP additive_expression
    ;

relational_expression
    : shift_expression  {$$=$1;}
    | relational_expression '<' shift_expression
    | relational_expression '>' shift_expression
    | relational_expression LE_OP shift_expression
    | relational_expression GE_OP shift_expression
    ;

equality_expression
    : relational_expression {$$=$1;}
    | equality_expression EQ_OP relational_expression
    | equality_expression NE_OP relational_expression
    ;

and_expression
    : equality_expression   {$$=$1;}
    | and_expression '&' equality_expression
    ;

exclusive_or_expression
    : and_expression    {$$=$1;}
    | exclusive_or_expression '^' and_expression
    ;

inclusive_or_expression 
    : exclusive_or_expression   {$$=$1;}
    | inclusive_or_expression '|' exclusive_or_expression
    ;

logical_and_expression
    : inclusive_or_expression   {$$=$1;}
    | logical_and_expression AND_OP inclusive_or_expression
    ;

logical_or_expression
    : logical_and_expression    {$$=$1;}
    | logical_or_expression OR_OP logical_and_expression
    ;

conditional_expression
    : logical_or_expression {$$=$1;}
    | logical_or_expression '?' expression ':' conditional_expression
    ;

assignment_expression
    : conditional_expression    {$$=$1;}
    | unary_expression assignment_operator assignment_expression    {if($1!=$3){type_err($1,$3);}}
    ;

assignment_operator
    : '='
    | MUL_ASSIGN
    | DIV_ASSIGN
    | MOD_ASSIGN
    | ADD_ASSIGN
    | SUB_ASSIGN
    | LEFT_ASSIGN
    | RIGHT_ASSIGN
    | AND_ASSIGN
    | XOR_ASSIGN
    | OR_ASSIGN
    ;

expression
    : assignment_expression {$$=$1;}
    | expression ',' assignment_expression
    ;

constant_expression
    : conditional_expression
    ;

declaration
    : declaration_specifiers ';'
    | declaration_specifiers init_declarator_list ';'
    ;

declaration_specifiers
    : storage_class_specifier
    | storage_class_specifier declaration_specifiers
    | type_specifier
    | type_specifier declaration_specifiers
    | type_qualifier
    | type_qualifier declaration_specifiers
    ;

init_declarator_list
    : init_declarator
    | init_declarator_list ',' init_declarator
    ;

init_declarator
    : declarator
    | declarator '=' initializer
    ;

storage_class_specifier
    : TYPEDEF
    | EXTERN
    | STATIC
    | AUTO
    | REGISTER
    ;

type_specifier
    : VOID  {temp=1;}
    | CHAR  {temp=2;}
    | SHORT {temp=3;}
    | INT   {temp=3;}
    | LONG  {temp=3;}
    | FLOAT {temp=4;}
    | DOUBLE    {temp=4;}
    | SIGNED
    | UNSIGNED
    | struct_or_union_specifier
    | enum_specifier
    | TYPE_NAME
    ;

struct_or_union_specifier
    : struct_or_union IDENTIFIER '{' struct_declaration_list '}'    {install();}
    | struct_or_union '{' struct_declaration_list '}'
    | struct_or_union IDENTIFIER    {install();}
    ;

struct_or_union
    : STRUCT
    | UNION
    ;

struct_declaration_list
    : struct_declaration
    | struct_declaration_list struct_declaration
    ;

struct_declaration
    : specifier_qualifier_list struct_declarator_list ';'
    ;

specifier_qualifier_list
    : type_specifier specifier_qualifier_list
    | type_specifier
    | type_qualifier specifier_qualifier_list
    | type_qualifier
    ;

struct_declarator_list
    : struct_declarator
    | struct_declarator_list ',' struct_declarator
    ;

struct_declarator
    : declarator
    | ':' constant_expression
    | declarator ':' constant_expression
    ;

enum_specifier
    : ENUM '{' enumerator_list '}'
    | ENUM IDENTIFIER '{' enumerator_list '}'
    | ENUM IDENTIFIER
    ;

enumerator_list
    : enumerator
    | enumerator_list ',' enumerator
    ;

enumerator
    : IDENTIFIER    {context_check();}
    | IDENTIFIER '=' constant_expression    //{context_check();}
    ;

type_qualifier
    : CONST
    | VOLATILE
    ;

declarator
    : pointer direct_declarator
    | direct_declarator
    ;

direct_declarator
    : IDENTIFIER    {install();}
    | '(' declarator ')'
    | direct_declarator '[' constant_expression ']'
    | direct_declarator '[' ']'
    | direct_declarator '(' parameter_type_list ')'
    | direct_declarator '(' identifier_list ')'
    | direct_declarator '(' ')'
    ;

pointer
    : '*'
    | '*' type_qualifier_list
    | '*' pointer
    | '*' type_qualifier_list pointer
    ;

type_qualifier_list
    : type_qualifier
    | type_qualifier_list type_qualifier
    ;


parameter_type_list
    : parameter_list
    | parameter_list ',' ELLIPSIS
    ;

parameter_list
    : parameter_declaration
    | parameter_list ',' parameter_declaration
    ;

parameter_declaration
    : declaration_specifiers declarator
    | declaration_specifiers abstract_declarator
    | declaration_specifiers
    ;

identifier_list
    : IDENTIFIER    {install();}
    | identifier_list ',' IDENTIFIER    {install();}
    ;

type_name
    : specifier_qualifier_list
    | specifier_qualifier_list abstract_declarator
    ;

abstract_declarator
    : pointer
    | direct_abstract_declarator
    | pointer direct_abstract_declarator
    ;

direct_abstract_declarator
    : '(' abstract_declarator ')'
    | '[' ']'
    | '[' constant_expression ']'
    | direct_abstract_declarator '[' ']'
    | direct_abstract_declarator '[' constant_expression ']'
    | '(' ')'
    | '(' parameter_type_list ')'
    | direct_abstract_declarator '(' ')'
    | direct_abstract_declarator '(' parameter_type_list ')'
    ;

initializer
    : assignment_expression {$$=$1;}
    | '{' initializer_list '}'
    | '{' initializer_list ',' '}'
    ;

initializer_list
    : initializer
    | initializer_list ',' initializer
    ;

statement
    : labeled_statement
    | compound_statement
    | expression_statement
    | selection_statement
    | iteration_statement
    | jump_statement
    ;

labeled_statement
    : IDENTIFIER ':' statement  //{context_check();}
    | CASE constant_expression ':' statement
    | DEFAULT ':' statement
    ;

compound_statement
    : '{' '}'
    | '{' statement_list '}'
    | '{' declaration_list '}'
    | '{' declaration_list statement_list '}'
    ;

declaration_list
    : declaration
    | declaration_list declaration
    ;

statement_list
    : statement
    | statement_list statement
    ;

expression_statement
    : ';'
    | expression ';'
    ;

selection_statement
    : IF '(' expression ')' statement  %prec LOWER_THAN_ELSE ;

    | IF '(' expression ')' statement ELSE statement
    | SWITCH '(' expression ')' statement
    ;

iteration_statement
    : WHILE '(' expression ')' statement
    | DO statement WHILE '(' expression ')' ';'
    | FOR '(' expression_statement expression_statement ')' statement
    | FOR '(' expression_statement expression_statement expression ')' statement
    ;

jump_statement
    : GOTO IDENTIFIER ';'   //{context_check();}
    | CONTINUE ';'
    | BREAK ';'
    | RETURN ';'
    | RETURN expression ';'
    ;

translation_unit
    : external_declaration
    | translation_unit external_declaration
    ;

external_declaration
    : function_definition
    | declaration
    ;

function_definition
    : declaration_specifiers declarator declaration_list compound_statement
    | declaration_specifiers declarator compound_statement
    | declarator declaration_list compound_statement
    | declarator compound_statement
    ;
%%
yyerror(s)
char *s;
{
    fflush(stdout);err=1;
    printf("Syntax error at Pos : %d : %d\n",line,cnt);
    exit(0);
    //printf("\n%*s\n%*s\n", column, "^", column, s);
}
main(argc,argv)
int argc;
char **argv;
{

开发者_如何学Python    char *fname;    
    ++argv,--argc;/*skip program name*/
    if(argc>0)
    {
        yyin=fopen(argv[0],"r");
        fname=argv[0];
        strcat(fname,"_output");
        yyout=fopen(fname,"w");
    }
    else
    {
        printf("Please give the c filename as an argument.\n");
    }
    yyparse();
    if(err==0)
    printf("No Syntax errors found!\n");
    fname=argv[0];strcat(fname,"_symbol-table");
    FILE *sym_tab=fopen(fname,"w");
    fprintf(sym_tab,"Type\tSymbol\n");
    symrec *ptr;    
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    {
        fprintf(sym_tab,"%d\t%s\n",ptr->type,ptr->name);
    }
    fclose(sym_tab);    

}   

Symbol table.h source code

#define t_void  1
#define t_char  2
#define t_int   3
#define t_float 4
struct symrec
{
    char *name;
    int type;
    struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
    symrec *ptr;
    ptr=(symrec *)malloc(sizeof(symrec));
    ptr->name=(char *)malloc(strlen(sym_name)+1);
    strcpy(ptr->name,sym_name);
    ptr->type=sym_type;
    ptr->next=(struct symrec *)sym_table;
    sym_table=ptr;
    return ptr;
}
symrec *getsym(char *sym_name)
{
    symrec *ptr;
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    if(strcmp(ptr->name,sym_name)==0)
    return ptr;
    return 0;
}


In general terms, when you have an assignment operation, you need to check the left operand to make sure its an lvalue and issue an error if its not. This is most commonly done as part of typechecking -- you keep attributes about values (eg, is it an lvalue or not) along with the type, and check that those attributes are correct for each use of a value.

So what you might do is use %union to define a parser value object that can hold this info:

%union {
    struct {
        Type  *type;
        int   is_lvalue;
    } valinfo;
}
%type<valinfo> assignment_expression unary_expression

Then, your rule for assignments would check this along with the type:

assignment_expression:
    unary_expression assignment_operator assignment_expression {
        if (!$1.is_lvalue)
            error("assigning to non-lvalue");
        if ($1.type != $3.type && !type_is_implicitly_convertable($3.type, $1.type))
            error("type mismatch in assignment");
        $$.type = $1. type;
        $$.is_lvalue = 0; }

Note that you need to make sure to set $$ properly in EVERY rule action that might have its value used by some other rule action; your code fails to do this, so likely won't do anything useful as is.

0

精彩评论

暂无评论...
验证码 换一张
取 消