【手搓一个脚本语言】五、用C语言抽象语法树AST解析简单的表达式字符串(括号)
- 接上一篇【手搓一个脚本语言】四、用C语言抽象语法树AST解析简单的表达式字符串(乘除法)-CSDN博客代码,再进一步改进!!!
- 目标:正确解析带括号的表达式,如:(1+2)*3,遍历AST能输出正确的结果!!!
1、括号的规则
- 括号"()",也称小括号、圆括号,包括左括号’(‘和右括号’)'两个符号!
- 括号的运算优先级:在表达式中,从左括号开始到右括号结束,这中间的内容,拥有有最高的运算优先级!
- 括号是可以嵌套的,最内部的拥有最高的优先级!
- 括号的解析,用数组来模拟栈来辅助解析带括号的表达式,栈的高度(也就是数组的长度)就是表达式括号嵌套的深度!
- 括号中表达式的保存,将括号内的表达式,做为一个子AST树,保存到父AST的节点!
2、添加子表达式类型
- 将AstNode的定义提到Token的定义之前!
- 定义T_SUBA,表示子表达式类型!
- 在union {…} V;中加入AstNode *aobj; //sub astnode,与T_SUBA对应!
#define T_OPER 0x21
#define T_NUMB 0x22
#define T_SUBA 0x23
typedef struct _AstNode *AstNode;
typedef struct _Token Token;
struct _Token {
union {
void *nval;
char oval[8];
long ival;
AstNode aobj;
} V;
char type;
char fill[7];
};
struct _AstNode {
Token token;
AstNode left, right;
};
2、修改释放AST节点函数
- 当节点类型为T_SUBA时,再次调用astnode_free释放节点保存子AST节点,又是递归!
void
astnode_free (AstNode node)
{
if (node != NULL)
{
astnode_free (node->left);
astnode_free (node->right);
if (node->token.type == T_SUBA)
astnode_free (node->token.V.aobj);
free (node);
}
}
3、修改遍历函数(注意:均为递归调用!)
3.1 直观遍历,在astnode_visual_trav_lv函数的switch中加入:
...
case T_SUBA: PFI("\n"); astnode_visual_trav_lv (node->token.V.aobj, lv, 'N'); break;
...
3.2 前序遍历,在astnode_prevorder_trav函数中加入输出括号和处理子表达式功能
void
astnode_prevorder_trav (AstNode node)
{
if (node != NULL)
{
if (node->token.type == T_OPER) PFI(" (");
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: astnode_prevorder_trav (node->token.V.aobj); break;
}
astnode_prevorder_trav (node->left);
astnode_prevorder_trav (node->right);
if (node->token.type == T_OPER) PFI(" )");
}
}
3.3 中序遍历,在astnode_inorder_trav函数的swtich中加入:
...
case T_SUBA: PFI(" ("); astnode_inorder_trav (node->token.V.aobj); PFI(" )"); break;
...
3.4 后序遍历,在astnode_postorder_trav函数的switch中加入:
...
case T_SUBA: astnode_postorder_trav (node->token.V.aobj); break;
...
4、修改解析函数parse_string
4.1 定义栈高度,栈的高度就是表达式中括号嵌套的深度!!!
#define STCKSIZE 16
4.2 定义栈数组,st保存运算符,tp保存数字
4.3 定义整型变量lv,保存栈指针,当遇到左括号时指针增一,当遇到右括号时指针减一
AstNode st[STCKSIZE] = {0};
AstNode tp[STCKSIZE] = {0};
int lv = 0;
4.4 将head替换为st[lv],将tp替换为tp[lv]
4.5 在switch中加入处理左右括号的功能,当为右括号时将当前栈(lv)中保存表达式保存为子表达式,再将子表达式追加到栈的下一级(lv-1)中运算符的右子节点,如栈的下一级(lv-1)为空,则直接赋值给栈的下一级(lv-1)!!!
case '(':
{
lv++; PFI("LPAR: %c\n", c);
}
break;
case ')':
{
AstNode node = NULL;
Token tk = { .type = T_SUBA, .V.aobj = st[lv] };
node = astnode_new (tk);
if (st[lv-1] == NULL)
{
st[lv-1] = node; st[lv] = NULL; tp[lv] = NULL;
}
else
{
if (st[lv-1]->right == NULL)
{
st[lv-1]->right = node;
}
else
{
AstNode tmp = st[lv-1]->right;
while (tmp->right != NULL)
tmp = tmp->right;
tmp->right = node;
}
st[lv] = NULL; tp[lv] = NULL;
}
lv--; PFI("RPAR: %c\n", c);
}
break;
4.6 在结束时为head赋值,将运算符栈底st[lv]赋值给head,返回head
head = st[lv];
return head;
5、测试函数
void
test_parse (void)
{
AstNode root = NULL;
char *st = "1+(2-(3+4+5)-6)+7";
PF("EXPR: %s\n", st);
PF("--------------------\n");
root = parse_string (st);
PF("--------------------\n");
PF(" MID: "); astnode_inorder_trav (root); PF("\n");
PF("--------------------\n");
PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
PF("--------------------\n");
PF("POST: "); astnode_postorder_trav (root); PF("\n");
PF("--------------------\n");
astnode_visual_trav (root);
PF("--------------------\n");
astnode_free (root);
}
6、编译运行
6.1 测试表达式:(1+2)*3,输出结果如下:
EXPR: (1+2)*3
--------------------
LPAR: (
NUMB: 1
OP: +
NUMB: 2
RPAR: )
OP: *
NUMB: 3
--------------------
MID: ( 1 + 2 ) * 3
--------------------
PREV: ( * ( + 1 2 ) 3 )
--------------------
POST: 1 2 + 3 *
--------------------
<
<[1]
[+]
>[2]
[*]
>[3]
--------------------
6.2 测试表达式:1*(2+3),输出结果如下:
EXPR: 1*(2+3)
--------------------
NUMB: 1
OP: *
LPAR: (
NUMB: 2
OP: +
NUMB: 3
RPAR: )
--------------------
MID: 1 * ( 2 + 3 )
--------------------
PREV: ( * 1 ( + 2 3 ) )
--------------------
POST: 1 2 3 + *
--------------------
<[1]
[*]
>
<[2]
[+]
>[3]
--------------------
6.3 测试表达式1+(2-(3+4+5)-6)+7输出结果如下:
EXPR: 1+(2-(3+4+5)-6)+7
--------------------
NUMB: 1
OP: +
LPAR: (
NUMB: 2
OP: -
LPAR: (
NUMB: 3
OP: +
NUMB: 4
OP: +
NUMB: 5
RPAR: )
OP: -
NUMB: 6
RPAR: )
OP: +
NUMB: 7
--------------------
MID: 1 + ( 2 - ( 3 + 4 + 5 ) - 6 ) + 7
--------------------
PREV: ( + ( + 1 ( - ( - 2 ( + ( + 3 4 ) 5 ) ) 6 ) ) 7 )
--------------------
POST: 1 2 3 4 + 5 + - 6 - + 7 +
--------------------
<[1]
<[+]
>
<[2]
<[-]
>
<[3]
<[+]
>[4]
[+]
>[5]
[-]
>[6]
[+]
>[7]
--------------------
7、以上测试输出结果达到预期!!!
8、完整代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GT_DEBUG 1
#if GT_DEBUG
#define PFI(...) fprintf (stderr, __VA_ARGS__)
#else
#define PFI(...)
#endif
#define PF(...) fprintf (stderr, __VA_ARGS__)
#define T_OPER 0x21
#define T_NUMB 0x22
#define T_SUBA 0x23
typedef struct _AstNode *AstNode;
typedef struct _Token Token;
struct _Token {
union {
void *nval;
char oval[8];
long ival;
AstNode aobj;
} V;
char type;
char fill[7];
};
struct _AstNode {
Token token;
AstNode left, right;
};
AstNode
astnode_new (Token tk)
{
AstNode node = (AstNode) malloc (sizeof(struct _AstNode));
node->token = tk; node->left = NULL; node->right = NULL;
return node;
}
void
astnode_free (AstNode node)
{
if (node != NULL)
{
astnode_free (node->left);
astnode_free (node->right);
if (node->token.type == T_SUBA)
astnode_free (node->token.V.aobj);
free (node);
}
}
static void
astnode_visual_trav_lv (AstNode node, int lv, char lr)
{
if (node != NULL)
{
astnode_visual_trav_lv (node->left, lv+1, 'L');
for (int i = 0; i < lv; i++) PFI(" ");
if (lr == 'L') PFI("<");
if (lr == 'R') PFI(">");
switch (node->token.type)
{
case T_OPER: PFI("[%s]\n", node->token.V.oval); break;
case T_NUMB: PFI("[%ld]\n", node->token.V.ival); break;
case T_SUBA: PFI("\n"); astnode_visual_trav_lv (node->token.V.aobj, lv, 'N'); break;
}
astnode_visual_trav_lv (node->right, lv+1, 'R');
}
}
void
astnode_visual_trav (AstNode node)
{
astnode_visual_trav_lv (node, 0, 'N');
}
void
astnode_prevorder_trav (AstNode node)
{
if (node != NULL)
{
if (node->token.type == T_OPER) PFI(" (");
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: astnode_prevorder_trav (node->token.V.aobj); break;
}
astnode_prevorder_trav (node->left);
astnode_prevorder_trav (node->right);
if (node->token.type == T_OPER) PFI(" )");
}
}
void
astnode_inorder_trav (AstNode node)
{
if (node != NULL)
{
astnode_inorder_trav (node->left);
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: PFI(" ("); astnode_inorder_trav (node->token.V.aobj); PFI(" )"); break;
}
astnode_inorder_trav (node->right);
}
}
void
astnode_postorder_trav (AstNode node)
{
if (node != NULL)
{
astnode_postorder_trav (node->left);
astnode_postorder_trav (node->right);
switch (node->token.type)
{
case T_OPER: PFI( " %s", node->token.V.oval); break;
case T_NUMB: PFI(" %ld", node->token.V.ival); break;
case T_SUBA: astnode_postorder_trav (node->token.V.aobj); break;
}
}
}
static int
get_priority (char *op)
{
if (op[0] == '+') return 1;
if (op[0] == '-') return 1;
if (op[0] == '*') return 2;
if (op[0] == '/') return 2;
}
#define STCKSIZE 16
#define NUMBSIZE 20
AstNode
parse_string (char *estr)
{
AstNode head = NULL;
AstNode st[STCKSIZE] = {0};
AstNode tp[STCKSIZE] = {0};
int lv = 0;
char nbuf[NUMBSIZE] = {0};
int ndx = 0;
int idx = 0;
char c;
c = estr[idx];
while (c != '\0')
{
switch (c)
{
case '0'...'9':
{
char n = estr[idx + 1];
nbuf[ndx] = c; ndx++;
if (!(n >= '0' && n <= '9'))
{
long lt = strtol (nbuf, NULL, 10);
Token tk = { .type = T_NUMB, .V.ival = lt };
AstNode node = astnode_new (tk);
if (st[lv] == NULL)
{
if (tp[lv] == NULL) tp[lv] = node;
else
PFI("Info: At index %d, Maybe Syntax error!\n", idx);
}
else
{
AstNode tmp = st[lv]->right;
if (tmp == NULL)
{
st[lv]->right = node;
}
else
{
while (tmp->right != NULL)
tmp = tmp->right;
tmp->right = node;
}
}
PFI("NUMB: %s\n", nbuf);
memset (nbuf, 0, NUMBSIZE); ndx = 0;
}
}
break;
case '+': case '-': case '*': case '/':
{
Token tk = { .type = T_OPER, .V.oval[0] = c };
AstNode node = astnode_new (tk);
int opr = get_priority (tk.V.oval);
if (st[lv] == NULL)
{
if (tp[lv] == NULL)
{
st[lv] = node;
}
else
{
node->left = tp[lv];
st[lv] = node;
}
}
else
{
int ppr = get_priority (st[lv]->token.V.oval);
if (opr > ppr)
{
node->left = st[lv]->right;
st[lv]->right = node;
}
else
{
node->left = st[lv];
st[lv] = node;
}
}
PFI(" OP: %c\n", c);
}
break;
case '(':
{
lv++; PFI("LPAR: %c\n", c);
}
break;
case ')':
{
AstNode node = NULL;
Token tk = { .type = T_SUBA, .V.aobj = st[lv] };
node = astnode_new (tk);
if (st[lv-1] == NULL)
{
st[lv-1] = node; st[lv] = NULL; tp[lv] = NULL;
}
else
{
if (st[lv-1]->right == NULL)
{
st[lv-1]->right = node;
}
else
{
AstNode tmp = st[lv-1]->right;
while (tmp->right != NULL)
tmp = tmp->right;
tmp->right = node;
}
st[lv] = NULL; tp[lv] = NULL;
}
lv--; PFI("RPAR: %c\n", c);
}
break;
case ' ': break;
default:
PFI("Error: At index %d, [%c], Syntax error!\n", idx, c); exit (0);
}
idx++; c = estr[idx];
}
head = st[lv];
return head;
}
void
test_parse (void)
{
AstNode root = NULL;
char *st = "1+(2-(3+(4-5)))";
PF("EXPR: %s\n", st);
PF("--------------------\n");
root = parse_string (st);
PF("--------------------\n");
PF(" MID: "); astnode_inorder_trav (root); PF("\n");
PF("--------------------\n");
PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
PF("--------------------\n");
PF("POST: "); astnode_postorder_trav (root); PF("\n");
PF("--------------------\n");
astnode_visual_trav (root);
PF("--------------------\n");
astnode_free (root);
}
void
test_astnode (void)
{
AstNode root = NULL, tpl = NULL, tpr = NULL;
Token tka = { .type = T_NUMB, .V.ival = 1 };
Token tkb = { .type = T_NUMB, .V.ival = 2 };
Token tkc = { .type = T_OPER, .V.oval[0] = '+' };
Token tkd = { .type = T_NUMB, .V.ival = 3 };
Token tke = { .type = T_OPER, .V.oval[0] = '-' };
tpl = astnode_new (tka);
tpr = astnode_new (tkb);
root = astnode_new (tkc);
root->left = tpl;
root->right = tpr;
tpl = root;
tpr = astnode_new (tkd);
root = astnode_new (tke);
root->left = tpl;
root->right = tpr;
PF(" MID: ");
astnode_inorder_trav (root); PF("\n");
PF("PREV: ");
astnode_prevorder_trav (root); PF("\n");
PF("POST: ");
astnode_postorder_trav (root); PF("\n");
astnode_free (root);
}
int
main (int argc, char *argv[])
{
test_parse ();
return 0;
}