Monday, 5 September 2016

Parser for sample language using YACC

Lex File

%{
            #include<stdio.h>
            #include"y.tab.h"
            int line=0;
%}

%%
[ \t]+                            ;
int|float|char                  return DT;
main                             return MAIN;
return                           return RET;
if|for|while|else  return KEYW;
[a-zA-Z][a-zA-Z0-9]* return ID;
[0-9]+                          return NUM;   
[(){},;=]                       return yytext[0];
\n                                 line++;
[><>=<===]                return AOP;
%%
yywrap()
{
return 1;
}

Yacc File

%{
            #include<stdio.h>
            extern FILE *yyin;
            extern int line;
%}

%token DT NUM ID RET MAIN KEYW AOP

%%
Pdash:P                        {printf("\n Parsing Done....No Syntax Error");}

P:DT MAIN '(' ')' '{' S R '}'

S:DS S
 |ES S
 |CS S
 |

DS:DT V ';'
V:ID|V','ID

ES:ID'='E';'
E:NUM|ID

CS:KEYW '(' A ')' '{' S '}'
A:ID AOP ID|ID AOP NUM|';'';'

R :RET NUM';'
%%

int main()
{
            yyin=fopen("input.txt","r");
            yyparse();
}

yyerror()
{
            printf("\n Error at line no:%d",line);
}

Input text file

int main()
{
            int a,b,c;
            b=4;
            a=b;
            if(a>2)
            {
            for(;;)
            {
                        c=2;
            }
            }
            return 0;
}

Output

student@ubuntu:~$ yacc -d parse.y
student@ubuntu:~$ lex lex1.l
student@ubuntu:~$ cc y.tab.c lex.yy.c
student@ubuntu:~$ ./a.out

Parsing Done....No Syntax Error

student@ubuntu:~$

code generation using LEX and YACC

-----------------------------------LEX FILE--------------------------------------
%{
#include<stdio.h>
#include<string.h>
#include "y.tab.h"
%}

%%
[a-z][a-z0-9]* {strcpy(yylval.var,yytext);return NAME;}
[+]  {strcpy(yylval.var,yytext);return PLUS;}
[=]  {strcpy(yylval.var,yytext);return EQUAL;}
[-] {strcpy(yylval.var,yytext);return SUBT;}
[*] {strcpy(yylval.var,yytext);return MULT;}
[/]    {strcpy(yylval.var,yytext);return DIVI;}
[\n\t] {return yytext[0];}
%%

------------------------------YACC FILE----------------------------------------
%{
#include<stdio.h>
#include<ctype.h>
#include<string.h>
FILE *fout;
%}

%token<var> NAME PLUS EQUAL MULT DIVI SUBT
%type<var> exp
%union
{
char var[10];
}
%right EQUAL
%left PLUS SUNT
%left MULT DIVI


%%    
      
input:line'\n'input
     |'\n'input
     |/*empty*/
     ;
line:NAME EQUAL exp {fprintf(fout,"MOV %s,AX\n",$1);}
    ;
exp:NAME PLUS NAME {fprintf(fout,"MOV AX,%s \n ADD AX,%s\n",$1,$3);}
    |NAME SUBT NAME {fprintf(fout,"MOV AX,%s \n SUB AX,%s\n",$1,$3);}
    |NAME MULT NAME {fprintf(fout,"MOV AX,%s \n MUL AX,%s\n",$1,$3);}
    |NAME DIVI NAME {fprintf(fout,"MOV AX,%s \n DIV AX,%s\n",$1,$3);}
    |NAME      {strcpy($$,$1);}
    ;
%%

extern yylineno;

yyerror()
{
       printf("\neroor %d",yylineno);   
}
yywrap()
{
return 1;
}
extern FILE *yyin;
main()

{

FILE *fin;
fin=fopen("input.txt","r");
fout=fopen("out.txt","w");

yyin=fin;

yyparse();
fcloseall(); 
return 0;
}
------------------------------------INPUT FILE-----------------------
t1=a+b
t2=b+t1
c=t2

--------------------------------OUTPUT FILE---------------------------
MOV AX,a
ADD AX,b
MOV t1,AX
MOV AX,b
ADD AX,t1
MOV t2,AX

MOV c,AX

second way for Recursive descent parser

Aim : Implementing recursive descent parser for sample language.

#include<stdio.h>
#include<string.h>


char input[10];
int i=0,error=0;
void E();
void T();
void Eprime();
void Tprime();
void F();
         
main()
{
            printf("\n\t\tGRAMMER WITHOUT RECURSION");
            printf("\n\t\tE------>TE'\n\t\tE'/e\r\t\tT----->FT");
            printf("\n\t\tT------>*FT/e\n\t\tF------>(E)/id");
            printf("\n\t\tEnter an arithmetic expression   :  ");
        gets(input);
       
        E();
        if(strlen(input)==i&&error==0)
            printf("\n\t\tAccepted..!!!\n");
        else
            printf("\n\t\tRejected..!!!\n");
}
        
void E()
{
            printf("\n\t\tE------->TE'");  
            T();
        Eprime();
}

void Eprime()
{
            if(input[i]=='+')
            {
           printf("\n\t\tE'------->+TE'");
           i++;
           T();
           Eprime();
            }
            else
            printf("\n\t\tE'----->e'");
}

void T()
{
printf("\n\t\tT------->FT'");   
 F();
     Tprime();
}

void Tprime()
{
            if(input[i]=='*')
        {
            printf("\n\t\tT'------>*FT'");
                        i++;
                F();
                Tprime();
        }
            else
            {
                        printf("\n\t\tT'----->e");
            }
}

void F()
{
            if(input[i]=='a')
            {
                        printf("\n\t\tF------>a");
                        i++;
            }
        else
            {
                        if(input[i]=='(')
                {
                  printf("\n\t\tF----->(E)");
                  i++;
                  E();
                  if(input[i]==')')
                          {
                         i++;
                          }
              else
                          {
                            printf("\n\t\tSyntax Error");
                        error=1;//exit(1);
                          }
                 }
                else
         error=1;
       }
}

OUTPUT

[student@localhost ~]$ gcc rdp.c
[student@localhost ~]$ ./a.out

                        GRAMMER WITHOUT RECURSION
                        E------>TE'
                        T----->FT
                        T------>*FT/e
                        F------>(E)/id
                        Enter an arithmetic expression   :  a+a*a

                        E------->TE'
                        T------->FT'
                        F------>a
                        T'----->e
                        E'------->+TE'
                        T------->FT'
                        F------>a
                        T'------>*FT'
                        F------>a
                        T'----->e
                        E'----->e'
                        Accepted..!!!

[student@localhost ~]$

Recursive descent parser

The grammar on which we are going to do recursive descent parsing is:

E -> E+T | T
T -> T*F | F
F -> (E) | id
  RD parser will verify whether the syntax of the input stream is correct by checking each character  from left to right. A basic operation necessary is reading characters from the input stream and matching then with terminals from the grammar that describes the syntax of the input.

 The given grammar can accept all arithmetic equations involving +, * and ().
eg:
 a+(a*a)  a+a*a , (a), a , a+a+a*a+a.... etc are accepted
a++a, a***a, +a, a*, ((a . . . etc are rejected.

Solution:
First we have to avoid left recursion

E -> TE'
E' -> +TE' | ε
T -> FT'
T' -> *FT' | ε
F -> (E) | id

After eliminating Left recursion, we have to simply move from one character to next by checking whether it follow the grammar. In this program, Îµ is indicated as $.


recursive.c

#include<stdio.h>
#include<string.h>

#include<ctype.h>
  
char input[10];
int i,error;
void E();
void T();
void Eprime();
void Tprime();
void F();
  
          main()
          {

i=0;
error=0;
                printf("Enter an arithmetic expression   :  "); // Eg: a+a*a
                gets(input);
                E();
                if(strlen(input)==i&&error==0)

                        printf("\nAccepted..!!!\n");
                else printf("\nRejected..!!!\n");
                          }
          
          
  
void E()
{
     T();
     Eprime();
}

void Eprime()
{
     if(input[i]=='+')
     {
     i++;
     T();
     Eprime();
     }
     }
void T()
{
     F();
     Tprime();
}
void Tprime()
{
     if(input[i]=='*')
     {
                      i++;
                      F();
                      Tprime();
                      }
                      }
     void F()
     {
          if(isalnum(input[i]))i++;
          else if(input[i]=='(')
          {
          i++;
          E();
          if(input[i]==')')
          i++;


          else error=1; 
            }
         
          else error=1;
          }
           


Output:
 a+(a*a)  a+a*a , (a), a , a+a+a*a+a.... etc are  accepted
++a, a***a, +a, a*, ((a . . . etc are rejected.

Code generation using DAG labelled tree

Algorithm for labeling the nodes of tree(DAG):


1. For Leaf Nodes
           Assign label 1 to left child node and label 0 to right child node.

2. For Interior Nodes

          Case 1: If Node's children's labels are different, then 
          Node's Label = Maximum among Children's Labels.
          Case 2: If Node's children's labels are same, then
          Node's Label = (Left Child's Label OR Right Child's Label) + 1.

Algorithm for Generating Assembly Code: 




(Say, R is a Stack consists of registers)
void gencode(Node)
{
if Node is intermediate node of tree(DAG)

{
   
 Case 1: if Node's left child's label == 1 && Node's right child's label == 0 && Node's left child is leaf node && Node's right child is leaf node
     {
       print "MOV Node's left child's data,R[top]"
       print "op Node's right child's data,R[top]"
     }

  
 Case 2: else if Node's left child's label >= 1 && Node's right child's label == 0
     {
       gencode(Node's left child);
       print "op Node's right child's data,R[top]"

     }

  Case 3:
 else if Node's left child's label < Node's right child's label
      {
       int temp;
       Swap Register Stack's top and second top element;
       gencode(Node's right child);
       temp=pop();
       gencode(Node's left child);
       push(temp); 

       Swap Register Stack's top and second top element; 
       print "op R[top-1],R[top]"
     }

  Case 4:
 else if Node's left child's label >= Node's right child's label
      {
       int temp;
       gencode(Node's left child);
       temp=pop();
       gencode(Node's right child);
       push(temp);
       print "op R[top-1],R[top]"

     }    

}

else if Node is leaf node and it is left child of it's
 immediate parent
    {
  print "MOV Node's data,R[top]"

    }


Program: (codegeneration.cpp)

  
#include<stdlib.h>
#include<iostream>
using namespace std;

/* We will implement DAG as Strictly Binary Tree where each node has zero or two children */

struct bin_tree
 
{
char data;
int label;
struct bin_tree *right, *left;
};
typedef bin_tree node;

class dag
{
private:
/* R is stack for storing registers */
int R[10];
int top;

/* op will be used for opcode name w.r.t. arithmetic operator e.g. ADD for + */
char *op;

public:
 

void initializestack(node *root)
{
/* value of top = index of topmost element of stack R = label of Root of tree(DAG) minus one */
    top=root->label - 1;

    /* Allocating Stack Registers */
    int temp=top;
    for(int i=0;i<=top;i++)
       {
          R[i]=temp;
          temp--;
       }
}

/* insertnode() and insert() functions are for adding nodes to tree(DAG) */

void insertnode(node **tree,char val)
{
node *temp = NULL;

if(!(*tree))
    {
        temp = (node *)malloc(sizeof(node));
        temp->left = temp->right = NULL;
        temp->data = val;
        temp->label=-1;
        *tree = temp;
    }
}

void insert(node **tree,char val)
{
    char l,r;
    int numofchildren;

    insertnode(tree, val);

    cout << "\nEnter number of children of " << val <<" :";
    cin >> numofchildren;

  if(numofchildren==2)
   {
    cout << "\nEnter Left Child of " << val <<" :";
    cin >> l;
    insertnode(&(*tree)->left,l);

    cout << "\nEnter Right Child of " << val <<" :";
    cin >> r;
    insertnode(&(*tree)->right,r);
  
 
    insert(&(*tree)->left,l);
    insert(&(*tree)->right,r);
   }
}

/* findleafnodelabel() will find out the label of leaf nodes of tree(DAG) */

void findleafnodelabel(node *tree,int val)
{

if(tree->left != NULL && tree->right !=NULL)
{
findleafnodelabel(tree->left,1);
findleafnodelabel(tree->right,0);
}

else
{
tree->label=val;
}

}

/* findinteriornodelabel() will find out the label of interior nodes of tree(DAG) */

void findinteriornodelabel(node *tree)
{
if(tree->left->label==-1)
 
{
findinteriornodelabel(tree->left);
}

else if(tree->right->label==-1)
{
findinteriornodelabel(tree->right);
}

else
{

if(tree->left != NULL && tree->right !=NULL)
{

if(tree->left->label == tree->right->label)
{

tree->label=(tree->left->label)+1;
}

else
{

if(tree->left->label > tree->right->label)
{
tree->label=tree->left->label;
}
else
{
tree->label=tree->right->label;
}

}
}
}
}

/* function print_inorder() will print inorder of nodes. Here we are also printing label of each node of tree(DAG) */

void print_inorder(node * tree)
{
    if (tree)
    {
        print_inorder(tree->left);
        cout << tree->data <<" with Label "<< tree->label << "\n";
        print_inorder(tree->right);
    }
}

/* function swap() will swap the top and second top elements of Register stack R */

void swap()
{
int temp;
temp=R[0];
R[0]=R[1];
R[1]=temp;
}

/* function pop() will remove and return topmost element of stack */

int pop()
{
int temp=R[top];
top--;
return temp;
}

/* function push() will increment top by one and will insert element at top position of Register stack */

void push(int temp)
{
top++;
R[top]=temp;
}

/* nameofoperation() will return opcode w.r.t. arithmetic operator */

void  nameofoperation(char temp)
{
switch(temp)
{
case '+': op =(char *)"ADD"; break;
case '-': op =(char *)"SUB"; break;
case '*': op =(char *)"MUL"; break;
case '/': op =(char *)"DIV"; break;
}
}

/* gencode() will generate Assembly code w.r.t. labels of tree(DAG) */

void gencode(node * tree)
{
if(tree->left != NULL && tree->right != NULL)
{
if(tree->left->label == 1 && tree->right->label == 0 && tree->left->left==NULL && tree->left->right==NULL && tree->right->left==NULL && tree->right->right==NULL)
{
cout << "MOV "<< tree->left->data << "," << "R[" << R[top] << "]\n";
nameofoperation(tree->data);
cout << op << " " << tree->right->data << ",R[" << R[top] << "]\n";
}

else if(tree->left->label >= 1 && tree->right->label == 0)
{
gencode(tree->left);
nameofoperation(tree->data);
cout << op << " " << tree->right->data << ",R[" << R[top] << "]\n";
}

else if(tree->left->label < tree->right->label)
{
int temp;
swap();
gencode(tree->right);
temp=pop();
gencode(tree->left);
push(temp);
swap();
nameofoperation(tree->data);
cout << op << " " << "R[" << R[top-1] <<"],R[" << R[top] << "]\n";
}

else if(tree->left->label >= tree->right->label)
{
 
int temp;
gencode(tree->left);
temp=pop();
gencode(tree->right);
push(temp);
nameofoperation(tree->data);
cout << op << " " << "R[" << R[top-1] << "],R[" << R[top] <<"]\n";
}

}

else if(tree->left == NULL && tree->right == NULL && tree->label == 1)
{
cout << "MOV " << tree->data << ",R[" << R[top] << "]\n";
}

}

/* deltree() will free the memory allocated for tree(DAG) */

void deltree(node * tree)
{
    if (tree)
    {
        deltree(tree->left);
        deltree(tree->right);
        free(tree);
    }
}

};

/* Program execution will start from main() function */

int main()
{
    node *root;
    root = NULL;
    node *tmp;
    char val;
    int i,temp;

    dag d;
   
 
    /* Inserting nodes into tree(DAG) */

    cout << "\nEnter root of tree:";
    cin >> val;

    d.insert(&root,val);

    /* Finding Labels of Leaf nodes */
    d.findleafnodelabel(root,1);

    /* Finding Labels of Interior nodes */
    while(root->label == -1)
       d.findinteriornodelabel(root);
    /* Initializing Stack contents and top variable */
 
    d.initializestack(root);

    /* Printing inorder of nodes of tree(DAG) */
    cout << "\nInorder Display:\n";
    d.print_inorder(root);
   
 
    /* Printing assembly code w.r.t. labels of tree(DAG) */
    cout << "\nAssembly Code:\n";
    d.gencode(root);
      
 
    /* Deleting all nodes of tree */
    d.deltree(root);

    return 0;
}

How To Run:

To Compile:
>>>g++ codegeneration.cpp

To Run:

>>>./a.out



Abstrct syntax tree using LEX and YACC

*******LEX***

   %{
   #include "y.tab.h"
   extern int yylval;
   %}
   %%
   "="      { return EQ; }
   "!="     { return NE; }
   "<"      { return LT; }
   "<="     { return LE; }
   ">"      { return GT; }
   ">="     { return GE; }
   "+"      { return PLUS; }
   "-"      { return MINUS; }
   "*"      { return MULT; }
   "/"      { return DIVIDE; }
   ")"      { return RPAREN; }
   "("      { return LPAREN; }
   ":="     { return ASSIGN; }
   ";"      { return SEMICOLON; }
   "IF"     { return IF; }
   "THEN"   { return THEN; }
   "ELSE"   { return ELSE; }
   "FI"     { return FI; }
   "WHILE"  { return WHILE; }
   "DO"     { return DO; }
   "OD"     { return OD; }
   "PRINT"  { return PRINT; }
   [0-9]+   { yylval = atoi(yytext); return NUMBER; }
   [a-z]    { yylval = yytext[0] - 'a'; return NAME; }  
   \        { ; }
   \n       { nextline(); }
   \t       { ; }
   "//".*\n { nextline(); }
   .        { yyerror("illegal token"); }
   %%
   #ifndef yywrap
   yywrap() { return 1; }
   #endif

*************Yacc*********

   %start ROOT

   %token EQ
   %token NE
   %token LT
   %token LE
   %token GT
   %token GE
   %token PLUS
   %token MINUS
   %token MULT
   %token DIVIDE
   %token RPAREN
   %token LPAREN
   %token ASSIGN
   %token SEMICOLON
   %token IF
   %token THEN
   %token ELSE
   %token FI
   %token WHILE
   %token DO
   %token OD
   %token PRINT
   %token NUMBER
   %token NAME

   %%

   ROOT:
     stmtseq { execute($1); }
   ;

   statement:
     designator ASSIGN expression { $$ = assignment($1, $3); }
   | PRINT expression { $$ = print($2); }
   | IF expression THEN stmtseq ELSE stmtseq FI
        { $$ = ifstmt($2, $4, $6); }
   | IF expression THEN stmtseq FI
        { $$ = ifstmt($2, $4, empty()); }
   | WHILE expression DO stmtseq OD { $$ = whilestmt($2, $4); }  
   ;

   stmtseq:
     stmtseq SEMICOLON statement { $$ = seq($1, $3); }
   | statement { $$ = $1; }
   ;

   expression:
     expr2 { $$ = $1; }
   | expr2 EQ expr2 { $$ = eq($1, $3); }
   | expr2 NE expr2 { $$ = ne($1, $3); }
   | expr2 LT expr2 { $$ = le($1, $3); }
   | expr2 LE expr2 { $$ = le($1, $3); }
   | expr2 GT expr2 { $$ = gt($1, $3); }
   | expr2 GE expr2 { $$ = gt($1, $3); }
   ;

   expr2:
     expr3 { $$ == $1; }
   | expr2 PLUS expr3 { $$ = plus($1, $3); }
   | expr2 MINUS expr3 { $$ = minus($1, $3); }
   ;

   expr3:
     expr4 { $$ = $1; }
   | expr3 MULT expr4 { $$ = mult($1, $3); }
   | expr3 DIVIDE expr4 { $$ = divide ($1, $3); }
   ;

   expr4:
     PLUS expr4 { $$ = $2; }
   | MINUS expr4 { $$ = neg($2); }
   | LPAREN expression RPAREN { $$ = $2; }
   | NUMBER { $$ = number($1); }
   | designator { $$ = $1; }
   ;

   designator:
     NAME { $$ = name($1); }
   ;
Here is the complete definition of the abstract syntax:

   domain Statement {

      assignment (Expression lhs, Expression rhs)
      print (Expression x)
      ifstmt (Expression cond, Statement thenpart, Statement elsepart)  
      whilestmt (Expression cond, Statement body)
      seq (Statement s1, Statement s2)
      empty ()

   }

   domain Expression {

      eq (Expression x, Expression y)
      ne (Expression x, Expression y)
      lt (Expression x, Expression y)
      le (Expression x, Expression y)
      gt (Expression x, Expression y)
      ge (Expression x, Expression y)
      plus (Expression x, Expression y)
      minus (Expression x, Expression y)
      mult (Expression x, Expression y)
      divide (Expression x, Expression y)
      neg (Expression x)
      number (int x)
      name (int location)

   }