Expression trees

Earlier in this section, there was a discussion on the need to record expressions while reading them for future evaluation. The best way to do this is by using a tree. To understand how a tree works, consider an expression such as:
8 + 9 * 5
which is evaluated as:
8 + (9 * 5)

Each operation has three components: the operator, and the two operands.

REQTEXT

The operators are called the nodes of the tree. At each node, there are two branches, representing the two operands of the operator. The end of each branch is a simple operand that is not an expression; such an operand is called a leaf.

Tree structures are a good way to represent expressions. They record all the information needed to evaluate the expression.

Tree structures can also represent a list of statements. In this case, think of the operator as the semicolon that separates the two.

REQTEXT

A while loop is represented similarly, with one branch giving the condition expression and the other giving the statement list. Finally, an if-else statement can be represented as a tree with three branches: one for the condition expression, one for the if statements, and one for the else statements. An if without an else is just a special case where the third branch is empty.

To represent these trees, the desk calculator example creates the following data types. These are defined in the header file header.h, which you include (with the #include directive) into the appropriate C source code files.
typedef union {
        int     value;
        struct nnode *np;
} ITEM;
typedef struct nnode {
        int     operator;
        ITEM    left, right, third;
} NODE;
#define LEFT    left.np
#define RIGHT   right.np

#define NNULL           ((NODE *) 0)
#define node(a,b,c)     triple(a, b, c, NNULL)

extern int variables[26];

int execute(NODE *np);

To record an expression, use malloc() to allocate an nnode structure. The operator is set to the operator of the expression; the tokens INTEGER, VARIABLE, WHILE, and IF are also used as appropriate. For leaves of the tree (simple operands), call a function named leaf() to fill in the left field and put null pointers in the other two. For operations that have two operands, call a function named node() to fill in the left and right fields with pointers to trees for the operands; the third field is given a null pointer value. For operations with three operands, call a function named triple() to fill in all three pointers.

As input is collected, tree structures are allocated and organized. When a complete statement has been collected, you can then call a function named execute() to walk through the tree and run the statement appropriately.

When the statement has been run, the tree is no longer needed. At that point, call a function named freeall() to free the memory used for all the structures that make up the tree.

Putting all this together produces the following grammar for the desk calculator program. Note that the functions part of the input contains everything you need except the execute() function. This example is provided in dc3.y.
%{
#include <stdio.h>
#include <stdlib.h>

#include "header.h"

static NODE *nalloc(void);
static NODE *leaf(int type, int value);
static NODE *triple(int op, NODE *left, NODE *right, NODE *third);
static void freeall(NODE *np);

int variables[26];
%}

%union {
        int ivalue;
        char variable;
        NODE *np;
};

%token <variable>  VARIABLE
%token <ivalue>    INTEGER '+' '-' '*' '/' '<' '>' GE LE NE EQ

%token WHILE IF PRINT ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'

%type <np>      statement expression statementlist simplestatement

%%

program:
          program statement     { execute($2); freeall($2); }
        | program error ';'     { yyerrok; }
        | /* NULL */
        ;

statement:
          simplestatement ';'
        | WHILE '(' expression ')' statement
                { $$ = node(WHILE, $3, $5); }
        | IF '(' expression ')' statement ELSE statement
                { $$ = triple(IF,$3,$5,$7); }
        | IF '(' expression ')' statement
                { $$ = triple(IF,$3,$5,NNULL); }
        | '{' statementlist '}'
                { $$ = $2; }
        ;

statementlist:
          statement
        | statementlist statement       { $$ = node(';', $1, $2); }
        ;

simplestatement:
          expression
        | PRINT expression              { $$ = node(PRINT,$2,NNULL); }
        | VARIABLE '=' expression
                        { $$ = node('=', leaf(VARIABLE, $1), $3); }
        ;

expression:
          INTEGER               { $$ = leaf(INTEGER, $1); }
        | VARIABLE              { $$ = leaf(VARIABLE, $1); }
        | expression '+' expression
                { binary: $$ = node($2, $1, $3); }
        | expression '-' expression     { goto binary; }
        | expression '*' expression     { goto binary; }
        | expression '/' expression     { goto binary; }
        | expression '<' expression     { goto binary; }
        | expression '>' expression     { goto binary; }
        | expression GE expression      { goto binary; }
        | expression LE expression      { goto binary; }
        | expression NE expression      { goto binary; }
        | expression EQ expression      { goto binary; }
        | '(' expression ')'            { $$ = $2; }
        ;

%%

static NODE *
nalloc()
{
        NODE *np;

        np = (NODE *) malloc(sizeof(NODE));
        if (np == NNULL) {
                printf("Out of Memory\n");
                exit(1);
        }
        return np;
}

static NODE *
leaf(type, value)
int type, value;
{
        NODE *np = nalloc();

        np->operator = type;
        np->left.value = value;
        return np;
}

static NODE *
triple(op, left, right, third)
int op;
NODE *left, *right, *third;
{
        NODE *np = nalloc();

        np->operator = op;
        np->left.np = left;
        np->right.np = right;
        np->third.np = third;
        return np;
}

static void
freeall(np)
NODE *np;
{
        if (np == NNULL)
                return;
        switch(np->operator) {
        case IF:                /* Triple */
                freeall(np->third.np);
        /* FALLTHROUGH */
                                /* Binary */
        case '+': case '-': case '*': case '/':
        case ';': case '<': case '>':
        case GE: case LE: case NE: case EQ:
        case WHILE:
        case '=':
                freeall(np->RIGHT);
        /* FALLTHROUGH */
        case PRINT:             /* Unary */
                freeall(np->LEFT);
                break;
        }
        free(np);
}
Note that there is a shift-reduce conflict in this grammar. This is due to the rules:
statement: IF ’(’ expression ’)’ statement ELSE statement ;
statement: IF ’(’ expression ’)’ statement ;
The default rules for resolving this conflict favor the shift action, which is what is desired in this case. An else that follows an if statement matches with the closest preceding if. (See Generating a parser using yacc for more details.)
The source code for the execute() function can be compiled separately. It walks through the tree node by node, calling itself recursively to run the branches at each node. The execute() function is basically a big switch statement, which looks at the node operator and takes appropriate action. It is quite straightforward. In the examples provided, this is file execute.c.
#include <stdio.h>
#include <stdlib.h>

#include "header.h"
#include "y.tab.h"

int
execute(np)
struct nnode *np;
{
        switch(np->operator) {
        case INTEGER:   return np->left.value;
        case VARIABLE:  return variables[np->left.value];
        case '+':       return execute(np->LEFT) + execute(np->RIGHT);
        case '-':       return execute(np->LEFT) - execute(np->RIGHT);
        case '*':       return execute(np->LEFT) * execute(np->RIGHT);
        case '/':       return execute(np->LEFT) / execute(np->RIGHT);
        case '<':       return execute(np->LEFT) < execute(np->RIGHT);
        case '>':       return execute(np->LEFT) > execute(np->RIGHT);
        case GE:        return execute(np->LEFT) >= execute(np->RIGHT);
        case LE:        return execute(np->LEFT) <= execute(np->RIGHT);
        case NE:        return execute(np->LEFT) != execute(np->RIGHT);
        case EQ:        return execute(np->LEFT) == execute(np->RIGHT);
        case PRINT:     printf("%d\n", execute(np->LEFT)); return 0;
        case ';':       execute(np->LEFT); return execute(np->RIGHT);
        case '=':
                        return
                          variables[np->LEFT->left.value] = execute(np->RIGHT);
        case WHILE:
                        while (execute(np->LEFT))
                                execute(np->RIGHT);
                        return 0;
        case IF:
                        if (execute(np->LEFT))
                                execute(np->RIGHT);
                        else if (np->third.np != NNULL)
                                execute(np->third.np);
                        return 0;
        }
        printf("Internal error! Bad node type!");
        exit(1);
}
Note that execute() calls the yyerror() function to issue error messages.