当前位置: 首页 > article >正文

【手搓一个脚本语言】五、用C语言抽象语法树AST解析简单的表达式字符串(括号)

【手搓一个脚本语言】五、用C语言抽象语法树AST解析简单的表达式字符串(括号)

  • 接上一篇【手搓一个脚本语言】四、用C语言抽象语法树AST解析简单的表达式字符串(乘除法)-CSDN博客代码,再进一步改进!!!
  • 目标:正确解析带括号的表达式,如:(1+2)*3,遍历AST能输出正确的结果!!!

1、括号的规则

  • 括号"()",也称小括号、圆括号,包括左括号’(‘和右括号’)'两个符号!
  • 括号的运算优先级:在表达式中,从左括号开始到右括号结束,这中间的内容,拥有有最高的运算优先级!
  • 括号是可以嵌套的,最内部的拥有最高的优先级!
  • 括号的解析,用数组来模拟栈来辅助解析带括号的表达式,栈的高度(也就是数组的长度)就是表达式括号嵌套的深度!
  • 括号中表达式的保存,将括号内的表达式,做为一个子AST树,保存到父AST的节点!

2、添加子表达式类型

  • 将AstNode的定义提到Token的定义之前!
  • 定义T_SUBA,表示子表达式类型!
  • 在union {…} V;中加入AstNode *aobj; //sub astnode,与T_SUBA对应!
/* define token type */
#define T_OPER  0x21
#define T_NUMB  0x22
#define T_SUBA  0x23

/* define astnode datatype */
typedef struct _AstNode *AstNode;

/* define token datatype */
typedef struct _Token Token;
struct _Token {
  union {
       void *nval;    //null
       char  oval[8]; //operator
       long  ival;    //integer
    AstNode  aobj;    //sub astnode
  } V;
  char type;       //value type
  char fill[7];    //unused
};

/* define astnode datastruct */
struct _AstNode {
  Token token;
  AstNode left, right;
};

2、修改释放AST节点函数

  • 当节点类型为T_SUBA时,再次调用astnode_free释放节点保存子AST节点,又是递归!
/* free the astnode */
void
astnode_free (AstNode node)
{
  if (node != NULL)
    {
      astnode_free (node->left);
      astnode_free (node->right);
      if (node->token.type == T_SUBA)
	astnode_free (node->token.V.aobj);
      free (node);
    }
}

3、修改遍历函数(注意:均为递归调用!)

3.1 直观遍历,在astnode_visual_trav_lv函数的switch中加入:

...
        case T_SUBA: PFI("\n"); astnode_visual_trav_lv (node->token.V.aobj, lv, 'N'); break;
...

3.2 前序遍历,在astnode_prevorder_trav函数中加入输出括号和处理子表达式功能

/* prevorder traverse ast */
void
astnode_prevorder_trav (AstNode node)
{
  if (node != NULL)
    {
      if (node->token.type == T_OPER) PFI(" (");
      switch (node->token.type)
	{
	case T_OPER: PFI( " %s", node->token.V.oval); break;
	case T_NUMB: PFI(" %ld", node->token.V.ival); break;
	case T_SUBA: astnode_prevorder_trav (node->token.V.aobj); break;
	}
      astnode_prevorder_trav (node->left);
      astnode_prevorder_trav (node->right);
      if (node->token.type == T_OPER) PFI(" )");
    }
}

3.3 中序遍历,在astnode_inorder_trav函数的swtich中加入:

...
        case T_SUBA: PFI(" ("); astnode_inorder_trav (node->token.V.aobj); PFI(" )"); break;
...

3.4 后序遍历,在astnode_postorder_trav函数的switch中加入:

...
        case T_SUBA: astnode_postorder_trav (node->token.V.aobj); break;
...

4、修改解析函数parse_string

4.1 定义栈高度,栈的高度就是表达式中括号嵌套的深度!!!

/* define stack height */
#define STCKSIZE 16

4.2 定义栈数组,st保存运算符,tp保存数字

4.3 定义整型变量lv,保存栈指针,当遇到左括号时指针增一,当遇到右括号时指针减一

  AstNode st[STCKSIZE] = {0};
  AstNode tp[STCKSIZE] = {0};
  int lv = 0;

4.4 将head替换为st[lv],将tp替换为tp[lv]

4.5 在switch中加入处理左右括号的功能,当为右括号时将当前栈(lv)中保存表达式保存为子表达式,再将子表达式追加到栈的下一级(lv-1)中运算符的右子节点,如栈的下一级(lv-1)为空,则直接赋值给栈的下一级(lv-1)!!!

	case '(': //left parentheses
	  {
	    lv++; PFI("LPAR: %c\n", c);
	  }
	  break;
	case ')': //right parentheses
	  {
	    AstNode node = NULL;
	    Token tk = { .type = T_SUBA, .V.aobj = st[lv] };
	    node = astnode_new (tk);
	    if (st[lv-1] == NULL)
	      {
		st[lv-1] = node; st[lv] = NULL; tp[lv] = NULL;
	      }
	    else
	      {
		if (st[lv-1]->right == NULL)
		  {
		    st[lv-1]->right = node;
		  }
		else
		  {
		    AstNode tmp = st[lv-1]->right;
		    while (tmp->right != NULL)
		      tmp = tmp->right;
		    tmp->right = node;
		  }
		st[lv] = NULL; tp[lv] = NULL;
	      }
	    lv--; PFI("RPAR: %c\n", c);
	  }
	  break;

4.6 在结束时为head赋值,将运算符栈底st[lv]赋值给head,返回head

  head = st[lv];
  return head;

5、测试函数

/* test parse string function */
void
test_parse (void)
{
  AstNode root = NULL;
  //char *st = "1*(2+3)";
  //char *st = "(1+2)*3";
  //char *st = "1*(2+3+4)*5";
  char *st = "1+(2-(3+4+5)-6)+7";
  PF("EXPR: %s\n", st);
  PF("--------------------\n");
  root = parse_string (st);
  PF("--------------------\n");
  PF(" MID: "); astnode_inorder_trav (root); PF("\n");
  PF("--------------------\n");
  PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
  PF("--------------------\n");
  PF("POST: "); astnode_postorder_trav (root); PF("\n");
  PF("--------------------\n");
  astnode_visual_trav (root);
  PF("--------------------\n");
  astnode_free (root);
}

6、编译运行

6.1 测试表达式:(1+2)*3,输出结果如下:

EXPR: (1+2)*3
--------------------
LPAR: (
NUMB: 1
  OP: +
NUMB: 2
RPAR: )
  OP: *
NUMB: 3
--------------------
 MID:  ( 1 + 2 ) * 3
--------------------
PREV:  ( * ( + 1 2 ) 3 )
--------------------
POST:  1 2 + 3 *
--------------------
    <
        <[1]
    [+]
        >[2]
[*]
    >[3]
--------------------

6.2 测试表达式:1*(2+3),输出结果如下:

EXPR: 1*(2+3)
--------------------
NUMB: 1
  OP: *
LPAR: (
NUMB: 2
  OP: +
NUMB: 3
RPAR: )
--------------------
 MID:  1 * ( 2 + 3 )
--------------------
PREV:  ( * 1 ( + 2 3 ) )
--------------------
POST:  1 2 3 + *
--------------------
    <[1]
[*]
    >
        <[2]
    [+]
        >[3]
--------------------

6.3 测试表达式1+(2-(3+4+5)-6)+7输出结果如下:

EXPR: 1+(2-(3+4+5)-6)+7
--------------------
NUMB: 1
  OP: +
LPAR: (
NUMB: 2
  OP: -
LPAR: (
NUMB: 3
  OP: +
NUMB: 4
  OP: +
NUMB: 5
RPAR: )
  OP: -
NUMB: 6
RPAR: )
  OP: +
NUMB: 7
--------------------
 MID:  1 + ( 2 - ( 3 + 4 + 5 ) - 6 ) + 7
--------------------
PREV:  ( + ( + 1 ( - ( - 2 ( + ( + 3 4 ) 5 ) ) 6 ) ) 7 )
--------------------
POST:  1 2 3 4 + 5 + - 6 - + 7 +
--------------------
        <[1]
    <[+]
        >
                <[2]
            <[-]
                >
                        <[3]
                    <[+]
                        >[4]
                [+]
                    >[5]
        [-]
            >[6]
[+]
    >[7]
--------------------

7、以上测试输出结果达到预期!!!

8、完整代码

/* filename gt.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* compile: gcc gt.c -o gt */
/*     run: ./gt           */
/* memcheck: valgrind --leak-check=yes ./gt */

/* debug on|off */
#define GT_DEBUG 1
#if GT_DEBUG
#define PFI(...) fprintf (stderr, __VA_ARGS__)
#else
#define PFI(...)
#endif
#define PF(...) fprintf (stderr, __VA_ARGS__)

/* define token type */
#define T_OPER  0x21
#define T_NUMB  0x22
#define T_SUBA  0x23

/* define astnode datatype */
typedef struct _AstNode *AstNode;

/* define token datatype */
typedef struct _Token Token;
struct _Token {
  union {
       void *nval;    //null
       char  oval[8]; //operator
       long  ival;    //integer
    AstNode  aobj;    //sub astnode
  } V;
  char type;       //value type
  char fill[7];    //unused
};

/* define astnode data struct */
struct _AstNode {
  Token token;
  AstNode left, right;
};

/* create a new astnode */
AstNode
astnode_new (Token tk)
{
  AstNode node = (AstNode) malloc (sizeof(struct _AstNode));
  node->token = tk; node->left = NULL; node->right = NULL;
  return node;
}

/* free the astnode */
void
astnode_free (AstNode node)
{
  if (node != NULL)
    {
      astnode_free (node->left);
      astnode_free (node->right);
      if (node->token.type == T_SUBA)
	astnode_free (node->token.V.aobj);
      free (node);
    }
}

/* visual traverse, by level, by left or right */
static void
astnode_visual_trav_lv (AstNode node, int lv, char lr)
{
  if (node != NULL)
    {
      astnode_visual_trav_lv (node->left, lv+1, 'L');
      for (int i = 0; i < lv; i++) PFI("    ");
      if (lr == 'L') PFI("<");
      if (lr == 'R') PFI(">");
      switch (node->token.type)
	{
	case T_OPER: PFI("[%s]\n",  node->token.V.oval); break;
	case T_NUMB: PFI("[%ld]\n", node->token.V.ival); break;
	case T_SUBA: PFI("\n"); astnode_visual_trav_lv (node->token.V.aobj, lv, 'N'); break;
	}
      astnode_visual_trav_lv (node->right, lv+1, 'R');
    }
}

/* visual traverse, looks like a tree */
void
astnode_visual_trav (AstNode node)
{
  astnode_visual_trav_lv (node, 0, 'N');
}

/* prevorder traverse ast */
void
astnode_prevorder_trav (AstNode node)
{
  if (node != NULL)
    {
      if (node->token.type == T_OPER) PFI(" (");
      switch (node->token.type)
	{
	case T_OPER: PFI( " %s", node->token.V.oval); break;
	case T_NUMB: PFI(" %ld", node->token.V.ival); break;
	case T_SUBA: astnode_prevorder_trav (node->token.V.aobj); break;
	}
      astnode_prevorder_trav (node->left);
      astnode_prevorder_trav (node->right);
      if (node->token.type == T_OPER) PFI(" )");
    }
}

/* inorder traverse ast */
void
astnode_inorder_trav (AstNode node)
{
    if (node != NULL)
    {
      astnode_inorder_trav (node->left);
      switch (node->token.type)
	{
	case T_OPER: PFI( " %s", node->token.V.oval); break;
	case T_NUMB: PFI(" %ld", node->token.V.ival); break;
	case T_SUBA: PFI(" ("); astnode_inorder_trav (node->token.V.aobj); PFI(" )"); break;
	}
      astnode_inorder_trav (node->right);
    }
}

/* postorder traverse ast */
void
astnode_postorder_trav (AstNode node)
{
  if (node != NULL)
    {
      astnode_postorder_trav (node->left);
      astnode_postorder_trav (node->right);
      switch (node->token.type)
	{
	case T_OPER: PFI( " %s", node->token.V.oval); break;
	case T_NUMB: PFI(" %ld", node->token.V.ival); break;
	case T_SUBA: astnode_postorder_trav (node->token.V.aobj); break;
	}
    }
}

/* get operator priority */
static int
get_priority (char *op)
{
  if (op[0] == '+') return 1;
  if (op[0] == '-') return 1;
  if (op[0] == '*') return 2;
  if (op[0] == '/') return 2;
}

/* define stack height */
#define STCKSIZE 16

/* define number buffer length */
#define NUMBSIZE 20

/* parse expression string to ast */
AstNode
parse_string (char *estr)
{
  AstNode head = NULL;
  AstNode st[STCKSIZE] = {0};
  AstNode tp[STCKSIZE] = {0};
  int lv = 0;

  char nbuf[NUMBSIZE] = {0};
  int ndx = 0;
  int idx = 0;
  char c;

  c = estr[idx];
  while (c != '\0')
    {
      switch (c)
	{
	case '0'...'9': //number
	  {
	    char n = estr[idx + 1]; //next char
	    nbuf[ndx] = c; ndx++;
	    if (!(n >= '0' && n <= '9')) //not a number
	      {
		long  lt = strtol (nbuf, NULL, 10);
		Token tk = { .type = T_NUMB, .V.ival = lt };
		AstNode node = astnode_new (tk);
		if (st[lv] == NULL)
		  {
		    if (tp[lv] == NULL) tp[lv] = node;
		    else
		      PFI("Info: At index %d, Maybe Syntax error!\n", idx);
		  }
		else
		  {
		    AstNode tmp = st[lv]->right;
		    if (tmp == NULL)
		      {
			st[lv]->right = node;
		      }
		    else
		      {
			while (tmp->right != NULL)
			  tmp = tmp->right;
			tmp->right = node;
		      }
		  }
		PFI("NUMB: %s\n", nbuf);
		memset (nbuf, 0, NUMBSIZE); ndx = 0; //init nbuf
	      }
	  }
	  break;
	case '+': case '-': case '*': case '/': //operator
	  {
	    Token tk = { .type = T_OPER, .V.oval[0] = c };
	    AstNode node = astnode_new (tk);
	    int opr = get_priority (tk.V.oval);
	    if (st[lv] == NULL)
	      {
		if (tp[lv] == NULL)
		  {
		    st[lv] = node;
		  }
		else
		  {
		    node->left = tp[lv];
		    st[lv] = node;
		  }
	      }
	    else
	      {
		int ppr = get_priority (st[lv]->token.V.oval);
		if (opr > ppr)
		  {
		    node->left = st[lv]->right;
		    st[lv]->right = node;
		  }
		else
		  {
		    node->left = st[lv];
		    st[lv] = node;
		  }
	      }
	    PFI("  OP: %c\n", c);
	  }
	  break;
	case '(': //left parentheses
	  {
	    lv++; PFI("LPAR: %c\n", c);
	  }
	  break;
	case ')': //right parentheses
	  {
	    AstNode node = NULL;
	    Token tk = { .type = T_SUBA, .V.aobj = st[lv] };
	    node = astnode_new (tk);
	    if (st[lv-1] == NULL)
	      {
		st[lv-1] = node; st[lv] = NULL; tp[lv] = NULL;
	      }
	    else
	      {
		if (st[lv-1]->right == NULL)
		  {
		    st[lv-1]->right = node;
		  }
		else
		  {
		    AstNode tmp = st[lv-1]->right;
		    while (tmp->right != NULL)
		      tmp = tmp->right;
		    tmp->right = node;
		  }
		st[lv] = NULL; tp[lv] = NULL;
	      }
	    lv--; PFI("RPAR: %c\n", c);
	  }
	  break;
	case ' ': break; //space do nothing
	default:
	  PFI("Error: At index %d, [%c], Syntax error!\n", idx, c); exit (0);
	}
      idx++; c = estr[idx];
    }
  head = st[lv];
  return head;
}

/**************************************************/

/* test parse string function */
void
test_parse (void)
{
  AstNode root = NULL;
  //char *st = "1+2-3";
  //char *st = "1+2-3+4";
  //char *st = "1+2-3+4-5";
  //char *st = "100-200+300+400-500";
  //char *st = "1+2*3";
  //char *st = "1*2+3";
  //char *st = "1+2*3+4";
  //char *st = "1+2+3*4+5+6";
  //char *st = "1*2+3*4";
  //char *st = "1*2*3+4*5*6+7*8*9";
  //char *st = "1*(2+3)";
  //char *st = "(1+2)*3";
  //char *st = "1*(2+3+4)*5";
  //char *st = "1+(2-(3+4+5)-6)+7";
  //char *st = "(((1+2)-3)+4)-5";
  char *st = "1+(2-(3+(4-5)))";
  PF("EXPR: %s\n", st);
  PF("--------------------\n");
  root = parse_string (st);
  PF("--------------------\n");
  PF(" MID: "); astnode_inorder_trav (root); PF("\n");
  PF("--------------------\n");
  PF("PREV: "); astnode_prevorder_trav (root); PF("\n");
  PF("--------------------\n");
  PF("POST: "); astnode_postorder_trav (root); PF("\n");
  PF("--------------------\n");
  astnode_visual_trav (root);
  PF("--------------------\n");
  astnode_free (root);
}

/* test astnode function */
void
test_astnode (void)
{
  AstNode root = NULL, tpl = NULL, tpr = NULL;
  Token tka = { .type = T_NUMB, .V.ival = 1 };
  Token tkb = { .type = T_NUMB, .V.ival = 2 };
  Token tkc = { .type = T_OPER, .V.oval[0] = '+' };
  Token tkd = { .type = T_NUMB, .V.ival = 3 };
  Token tke = { .type = T_OPER, .V.oval[0] = '-' };

  tpl  = astnode_new (tka); //1
  tpr  = astnode_new (tkb); //2
  root = astnode_new (tkc); //+
  root->left  = tpl;
  root->right = tpr;

  tpl  = root;
  tpr  = astnode_new (tkd); //3
  root = astnode_new (tke); //-
  root->left  = tpl;
  root->right = tpr;

  PF(" MID: ");
  astnode_inorder_trav (root); PF("\n");

  PF("PREV: ");
  astnode_prevorder_trav (root); PF("\n");

  PF("POST: ");
  astnode_postorder_trav (root); PF("\n");

  astnode_free (root);
}

/* main function */
int
main (int argc, char *argv[])
{
  //test_astnode ();
  test_parse ();
  return 0;
}
//-----GT-----//

http://www.kler.cn/a/463080.html

相关文章:

  • Linux Red Hat 7.9 Server安装GitLab
  • Spring Boot自定义Starter
  • 解決當前IP地址僅適用於本地網路
  • 朱姆沃尔特隐身战舰:从失败到威慑
  • (六)vForm 动态表单(数据量大,下拉选卡顿问题)
  • 设计模式の状态策略责任链模式
  • Adobe Illustrator 中裁剪图像的最快方案
  • 使用策略模式时的一个生效问题
  • 4.微服务灰度发布落地实践(消息队列增强)
  • NestJS 性能优化:从应用到部署的最佳实践
  • FPGA的FIFO
  • 数据挖掘——认识数据
  • SQL-Server链接服务器访问Oracle数据
  • 【蓝桥杯】:蓝桥杯之路径之谜
  • 机器人C++开源库The Robotics Library (RL)使用手册(四)
  • 关于ElasticSearch
  • 搭建医疗产品行业知识中台的手册
  • 深度学习在文本情感分析中的应用
  • 基于Redis的分布式锁
  • easybox
  • 【YashanDB知识库】hive初始化崖山报错YAS-04209
  • 万里数据库GreatSQL监控解析
  • 永嘉县瓯北六小:庆元旦,献爱心,让新永嘉人在童装节中找到归属感!
  • Golang学习历程【第五篇 复合数据类型:数组切片】
  • ShardingSphere-Proxy分表场景测试案例
  • CPT203 Software Engineering 软件工程 Pt.4 软件设计(中英双语)