【手搓一个脚本语言】六、用C语言抽象语法树AST计算表达式的值
- 接上一篇【手搓一个脚本语言】五、用C语言抽象语法树AST解析简单的表达式字符串(括号)-CSDN博客代码,再进一步改进!!!
- 目标:用抽象语法树AST解析完表达式后,计算表达式,求出表达式的正解结果!
1、表达式的计算方法
- 中缀表达式,可读性程度最高,人脑计算起来方便,编程实现比较复杂!
- 前缀表达式,可读性程度中等,对应LISP语言的表达式,编程实现容易!
- 后缀表达式,可读性程度最差,但编程实现轻松!!!
- 这里采用最容易编程实现的计算后缀表达式的方法!!!
2、统计AST的节点数
static void
astnode_count_all (AstNode node, int *cnt)
{
if (node != NULL)
{
astnode_count_all (node->left, cnt);
astnode_count_all (node->right, cnt);
if (node->token.type == T_SUBA)
astnode_count_all (node->token.V.aobj, cnt);
else
(*cnt)++;
}
}
int
astnode_count (AstNode node)
{
int count = 0;
astnode_count_all (node, &count);
return count;
}
3、用后序遍历的方式将AST中的所有值保存到数组中去,形成一个后缀表达式
- 在外部定义一个数组,长度为上一步统计的AST的节点数,将数组指针和数组的索引地址传给函数
void
astnode_postorder_trav_toa (AstNode node, Token tks[], int *idx)
{
if (node != NULL)
{
astnode_postorder_trav_toa (node->left, tks, idx);
astnode_postorder_trav_toa (node->right, tks, idx);
switch (node->token.type)
{
case T_OPER: tks[*idx] = node->token; (*idx)++; break;
case T_NUMB: tks[*idx] = node->token; (*idx)++; break;
case T_SUBA: astnode_postorder_trav_toa (node->token.V.aobj, tks, idx); break;
}
}
}
4、计算表达式函数
- 上一步完成,循环读出后缀表达式数组中的Token,如果是数字,压入栈,如果是运算符,则根据运算符计算出结果,再将结果压入栈,循环结束,栈底的值即为结果!!!
Token
eval_astnode (AstNode node)
{
int index = 0, count = 0;
Token *tks, tk, stk[16] = {0};
int sp = 0;
count = astnode_count (node);
tks = (Token*) malloc (count * sizeof(Token));
astnode_postorder_trav_toa (node, tks, &index);
PF("Node count: %d\n", count);
PF("Node index: %d\n", index);
for (int i = 0; i < count; i++)
{
switch (tks[i].type)
{
case T_NUMB:
{
PFI("PUSH %ld, SP: %d\n", tks[i].V.ival, sp);
stk[sp] = tks[i]; sp = sp + 1;
}
break;
case T_OPER:
{
switch (tks[i].V.oval[0])
{
case '+':
PFI("ADD %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival + stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
case '-':
PFI("SUB %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival - stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
case '*':
PFI("MUL %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival * stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
case '/':
PFI("DIV %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival / stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
}
}
break;
}
}
free (tks);
tk = stk[0];
return tk;
}
5、测试函数
void
test_eval (void)
{
AstNode root = NULL;
Token tk;
char *st = "(10+20)*30";
PF("EXPR: %s\n", st);
PF("--------------------\n");
root = parse_string (st);
PF("--------------------\n");
PF(" MID: "); astnode_inorder_trav (root); PF("\n");
PF("--------------------\n");
PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
PF("--------------------\n");
astnode_visual_trav (root);
PF("--------------------\n");
PF("POST: "); astnode_postorder_trav (root); PF("\n");
PF("--------------------\n");
tk = eval_astnode (root);
PF("--------------------\n");
PF("RESULT: %ld\n", tk.V.ival);
PF("--------------------\n");
astnode_free (root);
}
6、编译运行,结果如预期!!!
EXPR: (10+20)*30
--------------------
LPAR: (
NUMB: 10
OP: +
NUMB: 20
RPAR: )
OP: *
NUMB: 30
--------------------
MID: ( 10 + 20 ) * 30
--------------------
PREV: ( * ( + 10 20 ) 30 )
--------------------
<
<[10]
[+]
>[20]
[*]
>[30]
--------------------
POST: 10 20 + 30 *
--------------------
Node count: 5
Node index: 5
PUSH 10, SP: 0
PUSH 20, SP: 1
ADD 10 20 ==> 30
PUSH 30, SP: 1
MUL 30 30 ==> 900
--------------------
RESULT: 900
--------------------
7、检查内存分配情况,无误!!!
==3080== Memcheck, a memory error detector
==3080== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3080== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3080== Command: ./gt
==3080==
EXPR: (10+20)*30
--------------------
LPAR: (
NUMB: 10
OP: +
NUMB: 20
RPAR: )
OP: *
NUMB: 30
--------------------
MID: ( 10 + 20 ) * 30
--------------------
PREV: ( * ( + 10 20 ) 30 )
--------------------
<
<[10]
[+]
>[20]
[*]
>[30]
--------------------
POST: 10 20 + 30 *
--------------------
Node count: 5
Node index: 5
PUSH 10, SP: 0
PUSH 20, SP: 1
ADD 10 20 ==> 30
PUSH 30, SP: 1
MUL 30 30 ==> 900
--------------------
RESULT: 900
--------------------
==3080==
==3080== HEAP SUMMARY:
==3080== in use at exit: 0 bytes in 0 blocks
==3080== total heap usage: 7 allocs, 7 frees, 272 bytes allocated
==3080==
==3080== All heap blocks were freed -- no leaks are possible
==3080==
==3080== For counts of detected and suppressed errors, rerun with: -v
==3080== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
8、完整代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GT_DEBUG 1
#if GT_DEBUG
#define PFI(...) fprintf (stderr, __VA_ARGS__)
#else
#define PFI(...)
#endif
#define PF(...) fprintf (stderr, __VA_ARGS__)
#define T_OPER 0x21
#define T_NUMB 0x22
#define T_SUBA 0x23
typedef struct _AstNode *AstNode;
typedef struct _Token Token;
struct _Token {
union {
void *nval;
char oval[8];
long ival;
AstNode aobj;
} V;
char type;
char fill[7];
};
struct _AstNode {
Token token;
AstNode left, right;
};
AstNode
astnode_new (Token tk)
{
AstNode node = (AstNode) malloc (sizeof(struct _AstNode));
node->token = tk; node->left = NULL; node->right = NULL;
return node;
}
void
astnode_free (AstNode node)
{
if (node != NULL)
{
astnode_free (node->left);
astnode_free (node->right);
if (node->token.type == T_SUBA)
astnode_free (node->token.V.aobj);
free (node);
}
}
static void
astnode_count_all (AstNode node, int *cnt)
{
if (node != NULL)
{
astnode_count_all (node->left, cnt);
astnode_count_all (node->right, cnt);
if (node->token.type == T_SUBA)
astnode_count_all (node->token.V.aobj, cnt);
else
(*cnt)++;
}
}
int
astnode_count (AstNode node)
{
int count = 0;
astnode_count_all (node, &count);
return count;
}
static void
astnode_visual_trav_lv (AstNode node, int lv, char lr)
{
if (node != NULL)
{
astnode_visual_trav_lv (node->left, lv+1, 'L');
for (int i = 0; i < lv; i++) PFI(" ");
if (lr == 'L') PFI("<");
if (lr == 'R') PFI(">");
switch (node->token.type)
{
case T_OPER: PFI("[%s]\n", node->token.V.oval); break;
case T_NUMB: PFI("[%ld]\n", node->token.V.ival); break;
case T_SUBA: PFI("\n"); astnode_visual_trav_lv (node->token.V.aobj, lv, 'N'); break;
}
astnode_visual_trav_lv (node->right, lv+1, 'R');
}
}
void
astnode_visual_trav (AstNode node)
{
astnode_visual_trav_lv (node, 0, 'N');
}
void
astnode_prevorder_trav (AstNode node)
{
if (node != NULL)
{
if (node->token.type == T_OPER) PFI(" (");
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: astnode_prevorder_trav (node->token.V.aobj); break;
}
astnode_prevorder_trav (node->left);
astnode_prevorder_trav (node->right);
if (node->token.type == T_OPER) PFI(" )");
}
}
void
astnode_inorder_trav (AstNode node)
{
if (node != NULL)
{
astnode_inorder_trav (node->left);
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: PFI(" ("); astnode_inorder_trav (node->token.V.aobj); PFI(" )"); break;
}
astnode_inorder_trav (node->right);
}
}
void
astnode_postorder_trav (AstNode node)
{
if (node != NULL)
{
astnode_postorder_trav (node->left);
astnode_postorder_trav (node->right);
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: astnode_postorder_trav (node->token.V.aobj); break;
}
}
}
void
astnode_postorder_trav_toa (AstNode node, Token tks[], int *idx)
{
if (node != NULL)
{
astnode_postorder_trav_toa (node->left, tks, idx);
astnode_postorder_trav_toa (node->right, tks, idx);
switch (node->token.type)
{
case T_OPER: tks[*idx] = node->token; (*idx)++; break;
case T_NUMB: tks[*idx] = node->token; (*idx)++; break;
case T_SUBA: astnode_postorder_trav_toa (node->token.V.aobj, tks, idx); break;
}
}
}
static int
get_priority (char *op)
{
if (op[0] == '+') return 1;
if (op[0] == '-') return 1;
if (op[0] == '*') return 2;
if (op[0] == '/') return 2;
}
#define STCKSIZE 16
#define NUMBSIZE 20
AstNode
parse_string (char *estr)
{
AstNode head = NULL;
AstNode st[STCKSIZE] = {0};
AstNode tp[STCKSIZE] = {0};
int lv = 0;
char nbuf[NUMBSIZE] = {0};
int ndx = 0;
int idx = 0;
char c;
c = estr[idx];
while (c != '\0')
{
switch (c)
{
case '0'...'9':
{
char n = estr[idx + 1];
nbuf[ndx] = c; ndx++;
if (!(n >= '0' && n <= '9'))
{
long lt = strtol (nbuf, NULL, 10);
Token tk = { .type = T_NUMB, .V.ival = lt };
AstNode node = astnode_new (tk);
if (st[lv] == NULL)
{
if (tp[lv] == NULL) tp[lv] = node;
else
PFI("Info: At index %d, Maybe Syntax error!\n", idx);
}
else
{
AstNode tmp = st[lv]->right;
if (tmp == NULL)
{
st[lv]->right = node;
}
else
{
while (tmp->right != NULL)
tmp = tmp->right;
tmp->right = node;
}
}
PFI("NUMB: %s\n", nbuf);
memset (nbuf, 0, NUMBSIZE); ndx = 0;
}
}
break;
case '+': case '-': case '*': case '/':
{
Token tk = { .type = T_OPER, .V.oval[0] = c };
AstNode node = astnode_new (tk);
int opr = get_priority (tk.V.oval);
if (st[lv] == NULL)
{
if (tp[lv] == NULL)
{
st[lv] = node;
}
else
{
node->left = tp[lv];
st[lv] = node;
}
}
else
{
int ppr = get_priority (st[lv]->token.V.oval);
if (opr > ppr)
{
node->left = st[lv]->right;
st[lv]->right = node;
}
else
{
node->left = st[lv];
st[lv] = node;
}
}
PFI(" OP: %c\n", c);
}
break;
case '(':
{
lv++; PFI("LPAR: %c\n", c);
}
break;
case ')':
{
AstNode node = NULL;
Token tk = { .type = T_SUBA, .V.aobj = st[lv] };
node = astnode_new (tk);
if (st[lv-1] == NULL)
{
st[lv-1] = node; st[lv] = NULL; tp[lv] = NULL;
}
else
{
if (st[lv-1]->right == NULL)
{
st[lv-1]->right = node;
}
else
{
AstNode tmp = st[lv-1]->right;
while (tmp->right != NULL)
tmp = tmp->right;
tmp->right = node;
}
st[lv] = NULL; tp[lv] = NULL;
}
lv--; PFI("RPAR: %c\n", c);
}
break;
case ' ': break;
default:
PFI("Error: At index %d, [%c], Syntax error!\n", idx, c); exit (0);
}
idx++; c = estr[idx];
}
head = st[lv];
return head;
}
Token
eval_astnode (AstNode node)
{
int index = 0, count = 0;
Token *tks, tk, stk[16] = {0};
int sp = 0;
count = astnode_count (node);
tks = (Token*) malloc (count * sizeof(Token));
astnode_postorder_trav_toa (node, tks, &index);
PF("Node count: %d\n", count);
PF("Node index: %d\n", index);
for (int i = 0; i < count; i++)
{
switch (tks[i].type)
{
case T_NUMB:
{
PFI("PUSH %ld, SP: %d\n", tks[i].V.ival, sp);
stk[sp] = tks[i]; sp = sp + 1;
}
break;
case T_OPER:
{
switch (tks[i].V.oval[0])
{
case '+':
PFI("ADD %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival + stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
case '-':
PFI("SUB %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival - stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
case '*':
PFI("MUL %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival * stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
case '/':
PFI("DIV %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);
stk[sp-2].V.ival = stk[sp-2].V.ival / stk[sp-1].V.ival;
PFI(" ==> %ld\n", stk[sp-2].V.ival);
stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;
break;
}
}
break;
}
}
free (tks);
tk = stk[0];
return tk;
}
void
test_eval (void)
{
AstNode root = NULL;
Token tk;
char *st = "(10+20)*30";
PF("EXPR: %s\n", st);
PF("--------------------\n");
root = parse_string (st);
PF("--------------------\n");
PF(" MID: "); astnode_inorder_trav (root); PF("\n");
PF("--------------------\n");
PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
PF("--------------------\n");
astnode_visual_trav (root);
PF("--------------------\n");
PF("POST: "); astnode_postorder_trav (root); PF("\n");
PF("--------------------\n");
tk = eval_astnode (root);
PF("--------------------\n");
PF("RESULT: %ld\n", tk.V.ival);
PF("--------------------\n");
astnode_free (root);
}
void
test_parse (void)
{
AstNode root = NULL;
char *st = "1+(2-(3+(4-5)))";
PF("EXPR: %s\n", st);
PF("--------------------\n");
root = parse_string (st);
PF("--------------------\n");
PF(" MID: "); astnode_inorder_trav (root); PF("\n");
PF("--------------------\n");
PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
PF("--------------------\n");
PF("POST: "); astnode_postorder_trav (root); PF("\n");
PF("--------------------\n");
astnode_visual_trav (root);
PF("--------------------\n");
astnode_free (root);
}
void
test_astnode (void)
{
AstNode root = NULL, tpl = NULL, tpr = NULL;
Token tka = { .type = T_NUMB, .V.ival = 1 };
Token tkb = { .type = T_NUMB, .V.ival = 2 };
Token tkc = { .type = T_OPER, .V.oval[0] = '+' };
Token tkd = { .type = T_NUMB, .V.ival = 3 };
Token tke = { .type = T_OPER, .V.oval[0] = '-' };
tpl = astnode_new (tka);
tpr = astnode_new (tkb);
root = astnode_new (tkc);
root->left = tpl;
root->right = tpr;
tpl = root;
tpr = astnode_new (tkd);
root = astnode_new (tke);
root->left = tpl;
root->right = tpr;
PF(" MID: ");
astnode_inorder_trav (root); PF("\n");
PF("PREV: ");
astnode_prevorder_trav (root); PF("\n");
PF("POST: ");
astnode_postorder_trav (root); PF("\n");
astnode_free (root);
}
int
main (int argc, char *argv[])
{
test_eval ();
return 0;
}