Tuesday, 6 September 2016
Monday, 5 September 2016
Parser for sample language using YACC
Lex File
%{
#include<stdio.h>
#include"y.tab.h"
int
line=0;
%}
%%
[ \t]+ ;
int|float|char return DT;
main return
MAIN;
return return
RET;
if|for|while|else return KEYW;
[a-zA-Z][a-zA-Z0-9]* return ID;
[0-9]+ return
NUM;
[(){},;=] return
yytext[0];
\n line++;
[><>=<===] return AOP;
%%
yywrap()
{
return 1;
}
Yacc File
%{
#include<stdio.h>
extern
FILE *yyin;
extern
int line;
%}
%token DT NUM ID RET MAIN KEYW AOP
%%
Pdash:P {printf("\n
Parsing Done....No Syntax Error");}
P:DT MAIN '(' ')' '{' S R '}'
S:DS S
|ES S
|CS S
|
DS:DT V ';'
V:ID|V','ID
ES:ID'='E';'
E:NUM|ID
CS:KEYW '(' A ')' '{' S '}'
A:ID AOP ID|ID AOP NUM|';'';'
R :RET NUM';'
%%
int main()
{
yyin=fopen("input.txt","r");
yyparse();
}
yyerror()
{
printf("\n
Error at line no:%d",line);
}
Input text file
int main()
{
int
a,b,c;
b=4;
a=b;
if(a>2)
{
for(;;)
{
c=2;
}
}
return
0;
}
Output
student@ubuntu:~$ yacc -d parse.y
student@ubuntu:~$ lex lex1.l
student@ubuntu:~$ cc y.tab.c lex.yy.c
student@ubuntu:~$ ./a.out
Parsing Done....No Syntax Error
student@ubuntu:~$
code generation using LEX and YACC
-----------------------------------LEX
FILE--------------------------------------
%{
#include<stdio.h>
#include<string.h>
#include "y.tab.h"
%}
%%
[a-z][a-z0-9]* {strcpy(yylval.var,yytext);return NAME;}
[+]
{strcpy(yylval.var,yytext);return PLUS;}
[=] {strcpy(yylval.var,yytext);return
EQUAL;}
[-] {strcpy(yylval.var,yytext);return SUBT;}
[*] {strcpy(yylval.var,yytext);return MULT;}
[/] {strcpy(yylval.var,yytext);return
DIVI;}
[\n\t] {return yytext[0];}
%%
------------------------------YACC FILE----------------------------------------
%{
#include<stdio.h>
#include<ctype.h>
#include<string.h>
FILE *fout;
%}
%token<var> NAME PLUS EQUAL MULT DIVI SUBT
%type<var> exp
%union
{
char var[10];
}
%right EQUAL
%left PLUS SUNT
%left MULT DIVI
%%
input:line'\n'input
|'\n'input
|/*empty*/
;
line:NAME EQUAL exp {fprintf(fout,"MOV %s,AX\n",$1);}
;
exp:NAME PLUS NAME {fprintf(fout,"MOV AX,%s \n ADD
AX,%s\n",$1,$3);}
|NAME SUBT NAME
{fprintf(fout,"MOV AX,%s \n SUB AX,%s\n",$1,$3);}
|NAME MULT NAME
{fprintf(fout,"MOV AX,%s \n MUL AX,%s\n",$1,$3);}
|NAME DIVI NAME
{fprintf(fout,"MOV AX,%s \n DIV AX,%s\n",$1,$3);}
|NAME {strcpy($$,$1);}
;
%%
extern yylineno;
yyerror()
{
printf("\neroor
%d",yylineno);
}
yywrap()
{
return 1;
}
extern FILE *yyin;
main()
{
FILE *fin;
fin=fopen("input.txt","r");
fout=fopen("out.txt","w");
yyin=fin;
yyparse();
fcloseall();
return 0;
}
------------------------------------INPUT
FILE-----------------------
t1=a+b
t2=b+t1
c=t2
--------------------------------OUTPUT
FILE---------------------------
MOV AX,a
ADD AX,b
MOV t1,AX
MOV AX,b
ADD AX,t1
MOV t2,AX
MOV c,AX
second way for Recursive descent parser
Aim : Implementing recursive descent
parser for sample language.
#include<stdio.h>
#include<string.h>
char input[10];
int i=0,error=0;
void E();
void T();
void Eprime();
void Tprime();
void F();
main()
{
printf("\n\t\tGRAMMER
WITHOUT RECURSION");
printf("\n\t\tE------>TE'\n\t\tE'/e\r\t\tT----->FT");
printf("\n\t\tT------>*FT/e\n\t\tF------>(E)/id");
printf("\n\t\tEnter
an arithmetic expression : ");
gets(input);
E();
if(strlen(input)==i&&error==0)
printf("\n\t\tAccepted..!!!\n");
else
printf("\n\t\tRejected..!!!\n");
}
void E()
{
printf("\n\t\tE------->TE'");
T();
Eprime();
}
void Eprime()
{
if(input[i]=='+')
{
printf("\n\t\tE'------->+TE'");
i++;
T();
Eprime();
}
else
printf("\n\t\tE'----->e'");
}
void T()
{
printf("\n\t\tT------->FT'");
F();
Tprime();
}
void Tprime()
{
if(input[i]=='*')
{
printf("\n\t\tT'------>*FT'");
i++;
F();
Tprime();
}
else
{
printf("\n\t\tT'----->e");
}
}
void F()
{
if(input[i]=='a')
{
printf("\n\t\tF------>a");
i++;
}
else
{
if(input[i]=='(')
{
printf("\n\t\tF----->(E)");
i++;
E();
if(input[i]==')')
{
i++;
}
else
{
printf("\n\t\tSyntax Error");
error=1;//exit(1);
}
}
else
error=1;
}
}
OUTPUT
[student@localhost ~]$ gcc rdp.c
[student@localhost ~]$ ./a.out
GRAMMER
WITHOUT RECURSION
E------>TE'
T----->FT
T------>*FT/e
F------>(E)/id
Enter
an arithmetic expression : a+a*a
E------->TE'
T------->FT'
F------>a
T'----->e
E'------->+TE'
T------->FT'
F------>a
T'------>*FT'
F------>a
T'----->e
E'----->e'
Accepted..!!!
[student@localhost ~]$
Recursive descent parser
The grammar on
which we are going to do recursive descent parsing is:
E -> E+T | T
T -> T*F | F
F -> (E) | id
RD parser will verify whether the syntax of the input stream is correct by checking each character from left to right. A basic operation necessary is reading characters from the input stream and matching then with terminals from the grammar that describes the syntax of the input.
The given grammar can accept all arithmetic equations involving +, * and ().
eg:
a+(a*a) a+a*a , (a), a , a+a+a*a+a.... etc are accepted
a++a, a***a, +a, a*, ((a . . . etc are rejected.
Solution:
First we have to avoid left recursion
E -> TE'
E' -> +TE' | ε
T -> FT'
T' -> *FT' | ε
F -> (E) | id
After eliminating Left recursion, we have to simply move from one character to next by checking whether it follow the grammar. In this program, ε is indicated as $.
recursive.c
#include<stdio.h>
#include<string.h>
#include<ctype.h>
char input[10];
int i,error;
void E();
void T();
void Eprime();
void Tprime();
void F();
main()
{
i=0;
error=0;
printf("Enter an arithmetic expression : "); // Eg: a+a*a
gets(input);
E();
if(strlen(input)==i&&error==0)
printf("\nAccepted..!!!\n");
else printf("\nRejected..!!!\n");
}
void E()
{
T();
Eprime();
}
void Eprime()
{
if(input[i]=='+')
{
i++;
T();
Eprime();
}
}
void T()
{
F();
Tprime();
}
void Tprime()
{
if(input[i]=='*')
{
i++;
F();
Tprime();
}
}
void F()
{
if(isalnum(input[i]))i++;
else if(input[i]=='(')
{
i++;
E();
if(input[i]==')')
i++;
else error=1;
}
else error=1;
}
Output:
a+(a*a) a+a*a , (a), a , a+a+a*a+a.... etc are accepted
++a, a***a, +a, a*, ((a . . . etc are rejected.
E -> E+T | T
T -> T*F | F
F -> (E) | id
RD parser will verify whether the syntax of the input stream is correct by checking each character from left to right. A basic operation necessary is reading characters from the input stream and matching then with terminals from the grammar that describes the syntax of the input.
The given grammar can accept all arithmetic equations involving +, * and ().
eg:
a+(a*a) a+a*a , (a), a , a+a+a*a+a.... etc are accepted
a++a, a***a, +a, a*, ((a . . . etc are rejected.
Solution:
First we have to avoid left recursion
E -> TE'
E' -> +TE' | ε
T -> FT'
T' -> *FT' | ε
F -> (E) | id
After eliminating Left recursion, we have to simply move from one character to next by checking whether it follow the grammar. In this program, ε is indicated as $.
recursive.c
#include<stdio.h>
#include<string.h>
#include<ctype.h>
char input[10];
int i,error;
void E();
void T();
void Eprime();
void Tprime();
void F();
main()
{
i=0;
error=0;
printf("Enter an arithmetic expression : "); // Eg: a+a*a
gets(input);
E();
if(strlen(input)==i&&error==0)
printf("\nAccepted..!!!\n");
else printf("\nRejected..!!!\n");
}
void E()
{
T();
Eprime();
}
void Eprime()
{
if(input[i]=='+')
{
i++;
T();
Eprime();
}
}
void T()
{
F();
Tprime();
}
void Tprime()
{
if(input[i]=='*')
{
i++;
F();
Tprime();
}
}
void F()
{
if(isalnum(input[i]))i++;
else if(input[i]=='(')
{
i++;
E();
if(input[i]==')')
i++;
else error=1;
}
else error=1;
}
Output:
a+(a*a) a+a*a , (a), a , a+a+a*a+a.... etc are accepted
++a, a***a, +a, a*, ((a . . . etc are rejected.
Code generation using DAG labelled tree
Algorithm
for labeling the nodes of tree(DAG):
|
1. For Leaf Nodes
Assign label 1 to left child node and label 0 to right child node. 2. For Interior Nodes Case 1: If Node's children's labels are different, then Node's Label = Maximum among Children's Labels. Case 2: If Node's children's labels are same, then Node's Label = (Left Child's Label OR Right Child's Label) + 1. |
Algorithm for Generating Assembly Code:
|
(Say, R is a Stack
consists of registers)
void gencode(Node) { if Node is intermediate node of tree(DAG) { Case 1: if Node's left child's label == 1 && Node's right child's label == 0 && Node's left child is leaf node && Node's right child is leaf node { print "MOV Node's left child's data,R[top]" print "op Node's right child's data,R[top]" } Case 2: else if Node's left child's label >= 1 && Node's right child's label == 0 { gencode(Node's left child); print "op Node's right child's data,R[top]" } Case 3: else if Node's left child's label < Node's right child's label { int temp; Swap Register Stack's top and second top element; gencode(Node's right child); temp=pop(); gencode(Node's left child); push(temp); Swap Register Stack's top and second top element; print "op R[top-1],R[top]" } Case 4: else if Node's left child's label >= Node's right child's label { int temp; gencode(Node's left child); temp=pop(); gencode(Node's right child); push(temp); print "op R[top-1],R[top]" } } else if Node is leaf node and it is left child of it's immediate parent { print "MOV Node's data,R[top]" } } |
Program: (codegeneration.cpp)
|
#include<stdlib.h>
#include<iostream> using namespace std; /* We will implement DAG as Strictly Binary Tree where each node has zero or two children */ struct bin_tree { char data; int label; struct bin_tree *right, *left; }; typedef bin_tree node; class dag { private: /* R is stack for storing registers */ int R[10]; int top; /* op will be used for opcode name w.r.t. arithmetic operator e.g. ADD for + */ char *op; public: void initializestack(node *root) { /* value of top = index of topmost element of stack R = label of Root of tree(DAG) minus one */ top=root->label - 1; /* Allocating Stack Registers */ int temp=top; for(int i=0;i<=top;i++) { R[i]=temp; temp--; } } /* insertnode() and insert() functions are for adding nodes to tree(DAG) */ void insertnode(node **tree,char val) { node *temp = NULL; if(!(*tree)) { temp = (node *)malloc(sizeof(node)); temp->left = temp->right = NULL; temp->data = val; temp->label=-1; *tree = temp; } } void insert(node **tree,char val) { char l,r; int numofchildren; insertnode(tree, val); cout << "\nEnter number of children of " << val <<" :"; cin >> numofchildren; if(numofchildren==2) { cout << "\nEnter Left Child of " << val <<" :"; cin >> l; insertnode(&(*tree)->left,l); cout << "\nEnter Right Child of " << val <<" :"; cin >> r; insertnode(&(*tree)->right,r); insert(&(*tree)->left,l); insert(&(*tree)->right,r); } } /* findleafnodelabel() will find out the label of leaf nodes of tree(DAG) */ void findleafnodelabel(node *tree,int val) { if(tree->left != NULL && tree->right !=NULL) { findleafnodelabel(tree->left,1); findleafnodelabel(tree->right,0); } else { tree->label=val; } } /* findinteriornodelabel() will find out the label of interior nodes of tree(DAG) */ void findinteriornodelabel(node *tree) { if(tree->left->label==-1) { findinteriornodelabel(tree->left); } else if(tree->right->label==-1) { findinteriornodelabel(tree->right); } else { if(tree->left != NULL && tree->right !=NULL) { if(tree->left->label == tree->right->label) { tree->label=(tree->left->label)+1; } else { if(tree->left->label > tree->right->label) { tree->label=tree->left->label; } else { tree->label=tree->right->label; } } } } } /* function print_inorder() will print inorder of nodes. Here we are also printing label of each node of tree(DAG) */ void print_inorder(node * tree) { if (tree) { print_inorder(tree->left); cout << tree->data <<" with Label "<< tree->label << "\n"; print_inorder(tree->right); } } /* function swap() will swap the top and second top elements of Register stack R */ void swap() { int temp; temp=R[0]; R[0]=R[1]; R[1]=temp; } /* function pop() will remove and return topmost element of stack */ int pop() { int temp=R[top]; top--; return temp; } /* function push() will increment top by one and will insert element at top position of Register stack */ void push(int temp) { top++; R[top]=temp; } /* nameofoperation() will return opcode w.r.t. arithmetic operator */ void nameofoperation(char temp) { switch(temp) { case '+': op =(char *)"ADD"; break; case '-': op =(char *)"SUB"; break; case '*': op =(char *)"MUL"; break; case '/': op =(char *)"DIV"; break; } } /* gencode() will generate Assembly code w.r.t. labels of tree(DAG) */ void gencode(node * tree) { if(tree->left != NULL && tree->right != NULL) { if(tree->left->label == 1 && tree->right->label == 0 && tree->left->left==NULL && tree->left->right==NULL && tree->right->left==NULL && tree->right->right==NULL) { cout << "MOV "<< tree->left->data << "," << "R[" << R[top] << "]\n"; nameofoperation(tree->data); cout << op << " " << tree->right->data << ",R[" << R[top] << "]\n"; } else if(tree->left->label >= 1 && tree->right->label == 0) { gencode(tree->left); nameofoperation(tree->data); cout << op << " " << tree->right->data << ",R[" << R[top] << "]\n"; } else if(tree->left->label < tree->right->label) { int temp; swap(); gencode(tree->right); temp=pop(); gencode(tree->left); push(temp); swap(); nameofoperation(tree->data); cout << op << " " << "R[" << R[top-1] <<"],R[" << R[top] << "]\n"; } else if(tree->left->label >= tree->right->label) { int temp; gencode(tree->left); temp=pop(); gencode(tree->right); push(temp); nameofoperation(tree->data); cout << op << " " << "R[" << R[top-1] << "],R[" << R[top] <<"]\n"; } } else if(tree->left == NULL && tree->right == NULL && tree->label == 1) { cout << "MOV " << tree->data << ",R[" << R[top] << "]\n"; } } /* deltree() will free the memory allocated for tree(DAG) */ void deltree(node * tree) { if (tree) { deltree(tree->left); deltree(tree->right); free(tree); } } }; /* Program execution will start from main() function */ int main() { node *root; root = NULL; node *tmp; char val; int i,temp; dag d; /* Inserting nodes into tree(DAG) */ cout << "\nEnter root of tree:"; cin >> val; d.insert(&root,val); /* Finding Labels of Leaf nodes */ d.findleafnodelabel(root,1); /* Finding Labels of Interior nodes */ while(root->label == -1) d.findinteriornodelabel(root); /* Initializing Stack contents and top variable */ d.initializestack(root); /* Printing inorder of nodes of tree(DAG) */ cout << "\nInorder Display:\n"; d.print_inorder(root); /* Printing assembly code w.r.t. labels of tree(DAG) */ cout << "\nAssembly Code:\n"; d.gencode(root); /* Deleting all nodes of tree */ d.deltree(root); return 0; } |
How To Run:
To Compile:
>>>g++ codegeneration.cpp
To Run:
>>>./a.out
Abstrct syntax tree using LEX and YACC
*******LEX***
|
%{
#include
"y.tab.h"
extern int
yylval;
%}
%%
"=" { return EQ;
}
"!=" { return NE;
}
"<" { return
LT; }
"<=" { return
LE; }
">" { return
GT; }
">=" { return
GE; }
"+" { return
PLUS; }
"-" { return
MINUS; }
"*" { return
MULT; }
"/" { return
DIVIDE; }
")" { return
RPAREN; }
"(" { return
LPAREN; }
":=" { return
ASSIGN; }
";" { return
SEMICOLON; }
"IF" { return IF;
}
"THEN" { return
THEN; }
"ELSE" { return
ELSE; }
"FI" { return FI;
}
"WHILE" { return
WHILE; }
"DO" { return DO;
}
"OD" { return OD;
}
"PRINT" { return
PRINT; }
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
[a-z] { yylval = yytext[0] - 'a'; return NAME;
}
\ { ; }
\n { nextline(); }
\t { ; }
"//".*\n { nextline(); }
. { yyerror("illegal token");
}
%%
#ifndef yywrap
yywrap() { return
1; }
#endif
|
*************Yacc*********
|
%start ROOT
%token EQ
%token NE
%token LT
%token LE
%token GT
%token GE
%token PLUS
%token MINUS
%token MULT
%token DIVIDE
%token RPAREN
%token LPAREN
%token ASSIGN
%token SEMICOLON
%token IF
%token THEN
%token ELSE
%token FI
%token WHILE
%token DO
%token OD
%token PRINT
%token NUMBER
%token NAME
%%
ROOT:
stmtseq {
execute($1); }
;
statement:
designator
ASSIGN expression { $$ = assignment($1, $3); }
| PRINT
expression { $$ = print($2); }
| IF expression
THEN stmtseq ELSE stmtseq FI
{ $$ =
ifstmt($2, $4, $6); }
| IF expression
THEN stmtseq FI
{ $$ =
ifstmt($2, $4, empty()); }
| WHILE expression DO stmtseq OD { $$ =
whilestmt($2, $4); }
;
stmtseq:
stmtseq
SEMICOLON statement { $$ = seq($1, $3); }
| statement { $$
= $1; }
;
expression:
expr2 { $$ =
$1; }
| expr2 EQ expr2
{ $$ = eq($1, $3); }
| expr2 NE expr2 { $$ = ne($1, $3); }
| expr2 LT expr2
{ $$ = le($1, $3); }
| expr2 LE expr2
{ $$ = le($1, $3); }
| expr2 GT expr2
{ $$ = gt($1, $3); }
| expr2 GE expr2
{ $$ = gt($1, $3); }
;
expr2:
expr3 { $$ ==
$1; }
| expr2 PLUS
expr3 { $$ = plus($1, $3); }
| expr2 MINUS
expr3 { $$ = minus($1, $3); }
;
expr3:
expr4 { $$ =
$1; }
| expr3 MULT
expr4 { $$ = mult($1, $3); }
| expr3 DIVIDE
expr4 { $$ = divide ($1, $3); }
;
expr4:
PLUS expr4 { $$
= $2; }
| MINUS expr4 {
$$ = neg($2); }
| LPAREN
expression RPAREN { $$ = $2; }
| NUMBER { $$ =
number($1); }
| designator { $$
= $1; }
;
designator:
NAME { $$ =
name($1); }
;
|
Here is the complete definition of the abstract syntax:
|
domain Statement
{
assignment
(Expression lhs, Expression rhs)
print
(Expression x)
ifstmt
(Expression cond, Statement thenpart, Statement elsepart)
whilestmt
(Expression cond, Statement body)
seq (Statement
s1, Statement s2)
empty ()
}
domain Expression
{
eq (Expression
x, Expression y)
ne (Expression
x, Expression y)
lt (Expression
x, Expression y)
le (Expression
x, Expression y)
gt (Expression
x, Expression y)
ge (Expression
x, Expression y)
plus
(Expression x, Expression y)
minus
(Expression x, Expression y)
mult
(Expression x, Expression y)
divide
(Expression x, Expression y)
neg
(Expression x)
number (int x)
name (int
location)
}
|
Subscribe to:
Posts (Atom)


