【手搓一个脚本语言】七、用C语言抽象语法树AST实现一个可交互运行的表达式计算器
- 接上一篇【手搓一个脚本语言】六、用C语言抽象语法树AST计算表达式的值-CSDN博客代码,再进一步改进!!!
- 目标:实现一个可交互运行的表达式计算器,即输入表达式,输出正确的表达式的结果值!
1、处理特殊情况
1.1 表达式只有一个数字
- 在处理右括时,判断当前运算符栈节点是否为空,如为空,将数字栈节点赋值给记号,然后再创建子节点!
...case ')': {AstNode node = NULL;Token tk = { .type = T_SUBA, .V.aobj = st[lv] };if (st[lv] == NULL) tk.V.aobj = tp[lv];node = astnode_new (tk);
...
- 在返回head前判断当前运算符栈节点是否为空,如为空,将数字栈节点赋值给head!
...if (st[lv] == NULL){if (tp[lv] == NULL) {} else head = tp[lv];}else{head = st[lv];}
...
1.2 减法单目运算,在eval_astnode函数的减法运算中加入代码:
...case '-': if (sp-2 < 0){PFI("SUB %ld", stk[sp-1].V.ival);stk[sp-1].V.ival = -(stk[sp-1].V.ival);PFI(" ==> %ld\n", stk[sp-1].V.ival);break;}
...
1.3 除数不能为零,在eval_astnode函数的除法运算中加入代码,输出错误信息:
...case '/': if (stk[sp-1].V.ival == 0){PF("Error: The divisor cannot be zero!\n");break;}
...
2、编译测试
2.1 测试表达式为单一数字或括号中只有单一数字情况,表达式((1001)+2)输出结果符合预期:
EXPR: ((1001)+2)
--------------------
LPAR: (
LPAR: (
NUMB: 1001
RPAR: )OP: +
NUMB: 2
RPAR: )
--------------------MID: ( ( 1001 ) + 2 )
--------------------
PREV: ( + 1001 2 )
--------------------<[1001]
[+]>[2]
--------------------
POST: 1001 2 +
--------------------
Node count: 3
Node index: 3
PUSH 1001, SP: 0
PUSH 2, SP: 1
ADD 1001 2 ==> 1003
--------------------
RESULT: 1003
--------------------
2.2 测试减法单目运算,表达式-(1000-900)输出结果符合预期:
EXPR: -(1000-900)
--------------------OP: -
LPAR: (
NUMB: 1000OP: -
NUMB: 900
RPAR: )
--------------------MID: - ( 1000 - 900 )
--------------------
PREV: ( - ( - 1000 900 ) )
--------------------
[-]><[1000][-]>[900]
--------------------
POST: 1000 900 - -
--------------------
Node count: 4
Node index: 4
PUSH 1000, SP: 0
PUSH 900, SP: 1
SUB 1000 900 ==> 100
SUB 100 ==> -100
--------------------
RESULT: -100
--------------------
2.3 测试除数为零情况,表达式1001/(90-90)输出结果符合预期:
EXPR: 1001/(90-90)
--------------------
NUMB: 1001OP: /
LPAR: (
NUMB: 90OP: -
NUMB: 90
RPAR: )
--------------------MID: 1001 / ( 90 - 90 )
--------------------
PREV: ( / 1001 ( - 90 90 ) )
--------------------<[1001]
[/]><[90][-]>[90]
--------------------
POST: 1001 90 90 - /
--------------------
Node count: 5
Node index: 5
PUSH 1001, SP: 0
PUSH 90, SP: 1
PUSH 90, SP: 2
SUB 90 90 ==> 0
Error: The divisor cannot be zero!
--------------------
RESULT: 1001
--------------------
3、交互运行函数
- 定义表达式字符串缓冲区长度宏EXPRSIZE,值为1024
- 定义交互运行函数expr_shell,逻辑流程如下:
- 输出提示符’EXPR:> ';
- 循环从标准输入中读入字符,如为EOF则退出;
- 如为换行符’\n’则:
- 解析缓冲区字符串到AST
- 输出中缀表达式
- 运算AST,输出结果值
- 清空缓冲区,继续循环;
- 其他字符则存入缓冲区,缓冲区索引自增!
#define EXPRSIZE 1024
void
expr_shell (void)
{char buf[EXPRSIZE] = {0};int idx = 0;char c;PF("EXPR:> ");c = fgetc (stdin);while (c != EOF){if (c == '\n'){Token tk = { .type = 0, .V.nval = 0 };AstNode root = NULL;PF("------------------------------\n");root = parse_string (buf);PF("------------------------------\n MID: ");astnode_inorder_trav (root);PF("\n------------------------------\n");tk = eval_astnode (root);PF("------------------------------\n");PF("RESULT: %ld\n", tk.V.ival);PF("------------------------------\n");astnode_free (root);memset (buf, 0, EXPRSIZE); idx = 0;PF("EXPR:> ");}else{buf[idx] = c; idx++;}c = fgetc (stdin);}PF("\nBye!\n");
}
4、测试函数,在main函数中调用expr_shell函数
int
main (int argc, char *argv[])
{expr_shell ();return 0;
}
5、编译运行
- 测试表达式:1020-30/315,结果如预期,输入CTRL-D退出!
EXPR:> 10*20-30/3*15
------------------------------
NUMB: 10OP: *
NUMB: 20OP: -
NUMB: 30OP: /
NUMB: 3OP: *
NUMB: 15
------------------------------MID: 10 * 20 - 30 / 3 * 15
------------------------------
Node count: 9
Node index: 9
PUSH 10, SP: 0
PUSH 20, SP: 1
MUL 10 20 ==> 200
PUSH 30, SP: 1
PUSH 3, SP: 2
DIV 30 3 ==> 10
PUSH 15, SP: 2
MUL 10 15 ==> 150
SUB 200 150 ==> 50
------------------------------
RESULT: 50
------------------------------
EXPR:>
Bye!
6、检查内存分配情况,无误!!!
==3556== Memcheck, a memory error detector
==3556== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3556== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3556== Command: ./gt
==3556==
EXPR:> (1+2)*300/9
------------------------------
LPAR: (
NUMB: 1OP: +
NUMB: 2
RPAR: )OP: *
NUMB: 300OP: /
NUMB: 9
------------------------------MID: ( 1 + 2 ) * 300 / 9
------------------------------
Node count: 7
Node index: 7
PUSH 1, SP: 0
PUSH 2, SP: 1
ADD 1 2 ==> 3
PUSH 300, SP: 1
MUL 3 300 ==> 900
PUSH 9, SP: 1
DIV 900 9 ==> 100
------------------------------
RESULT: 100
------------------------------
EXPR:>
Bye!
==3556==
==3556== HEAP SUMMARY:
==3556== in use at exit: 0 bytes in 0 blocks
==3556== total heap usage: 10 allocs, 10 frees, 1,392 bytes allocated
==3556==
==3556== All heap blocks were freed -- no leaks are possible
==3556==
==3556== For counts of detected and suppressed errors, rerun with: -v
==3556== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
7、完整代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GT_DEBUG 1
#if GT_DEBUG
#define PFI(...) fprintf (stderr, __VA_ARGS__)
#else
#define PFI(...)
#endif
#define PF(...) fprintf (stderr, __VA_ARGS__)
#define T_OPER 0x21
#define T_NUMB 0x22
#define T_SUBA 0x23
typedef struct _AstNode *AstNode;
typedef struct _Token Token;
struct _Token {union {void *nval; char oval[8]; long ival; AstNode aobj; } V;char type; char fill[7];
};
struct _AstNode {Token token;AstNode left, right;
};
AstNode
astnode_new (Token tk)
{AstNode node = (AstNode) malloc (sizeof(struct _AstNode));node->token = tk; node->left = NULL; node->right = NULL;return node;
}
void
astnode_free (AstNode node)
{if (node != NULL){astnode_free (node->left);astnode_free (node->right);if (node->token.type == T_SUBA)astnode_free (node->token.V.aobj);free (node);}
}
static void
astnode_count_all (AstNode node, int *cnt)
{if (node != NULL){astnode_count_all (node->left, cnt);astnode_count_all (node->right, cnt);if (node->token.type == T_SUBA)astnode_count_all (node->token.V.aobj, cnt);else(*cnt)++;}
}
int
astnode_count (AstNode node)
{int count = 0;astnode_count_all (node, &count);return count;
}
static void
astnode_visual_trav_lv (AstNode node, int lv, char lr)
{if (node != NULL){astnode_visual_trav_lv (node->left, lv+1, 'L');for (int i = 0; i < lv; i++) PFI(" ");if (lr == 'L') PFI("<");if (lr == 'R') PFI(">");switch (node->token.type){case T_OPER: PFI("[%s]\n", node->token.V.oval); break;case T_NUMB: PFI("[%ld]\n", node->token.V.ival); break;case T_SUBA: PFI("\n"); astnode_visual_trav_lv (node->token.V.aobj, lv, 'N'); break;}astnode_visual_trav_lv (node->right, lv+1, 'R');}
}
void
astnode_visual_trav (AstNode node)
{astnode_visual_trav_lv (node, 0, 'N');
}
void
astnode_prevorder_trav (AstNode node)
{if (node != NULL){if (node->token.type == T_OPER) PFI(" (");switch (node->token.type){case T_OPER: PFI( " %s", node->token.V.oval); break;case T_NUMB: PFI(" %ld", node->token.V.ival); break;case T_SUBA: astnode_prevorder_trav (node->token.V.aobj); break;}astnode_prevorder_trav (node->left);astnode_prevorder_trav (node->right);if (node->token.type == T_OPER) PFI(" )");}
}
void
astnode_inorder_trav (AstNode node)
{if (node != NULL){astnode_inorder_trav (node->left);switch (node->token.type){case T_OPER: PFI( " %s", node->token.V.oval); break;case T_NUMB: PFI(" %ld", node->token.V.ival); break;case T_SUBA: PFI(" ("); astnode_inorder_trav (node->token.V.aobj); PFI(" )"); break;}astnode_inorder_trav (node->right);}
}
void
astnode_postorder_trav (AstNode node)
{if (node != NULL){astnode_postorder_trav (node->left);astnode_postorder_trav (node->right);switch (node->token.type){case T_OPER: PFI( " %s", node->token.V.oval); break;case T_NUMB: PFI(" %ld", node->token.V.ival); break;case T_SUBA: astnode_postorder_trav (node->token.V.aobj); break;}}
}
void
astnode_postorder_trav_toa (AstNode node, Token tks[], int *idx)
{if (node != NULL){astnode_postorder_trav_toa (node->left, tks, idx);astnode_postorder_trav_toa (node->right, tks, idx);switch (node->token.type){case T_OPER: tks[*idx] = node->token; (*idx)++; break;case T_NUMB: tks[*idx] = node->token; (*idx)++; break;case T_SUBA: astnode_postorder_trav_toa (node->token.V.aobj, tks, idx); break;}}
}
static int
get_priority (char *op)
{if (op[0] == '+') return 1;if (op[0] == '-') return 1;if (op[0] == '*') return 2;if (op[0] == '/') return 2;
}
#define STCKSIZE 16
#define NUMBSIZE 20
AstNode
parse_string (char *estr)
{AstNode head = NULL;AstNode st[STCKSIZE] = {0};AstNode tp[STCKSIZE] = {0};int lv = 0;char nbuf[NUMBSIZE] = {0};int ndx = 0;int idx = 0;char c;c = estr[idx];while (c != '\0'){switch (c){case '0'...'9': {char n = estr[idx + 1]; nbuf[ndx] = c; ndx++;if (!(n >= '0' && n <= '9')) {long lt = strtol (nbuf, NULL, 10);Token tk = { .type = T_NUMB, .V.ival = lt };AstNode node = astnode_new (tk);if (st[lv] == NULL){if (tp[lv] == NULL) tp[lv] = node;elsePFI("Info: At index %d, Maybe Syntax error!\n", idx);}else{AstNode tmp = st[lv]->right;if (tmp == NULL){st[lv]->right = node;}else{while (tmp->right != NULL)tmp = tmp->right;tmp->right = node;}}PFI("NUMB: %s\n", nbuf);memset (nbuf, 0, NUMBSIZE); ndx = 0; }}break;case '+': case '-': case '*': case '/': {Token tk = { .type = T_OPER, .V.oval[0] = c };AstNode node = astnode_new (tk);int opr = get_priority (tk.V.oval);if (st[lv] == NULL){if (tp[lv] == NULL){st[lv] = node;}else{node->left = tp[lv];st[lv] = node;}}else{int ppr = get_priority (st[lv]->token.V.oval);if (opr > ppr){node->left = st[lv]->right;st[lv]->right = node;}else{node->left = st[lv];st[lv] = node;}}PFI(" OP: %c\n", c);}break;case '(': {lv++; PFI("LPAR: %c\n", c);}break;case ')': {AstNode node = NULL;Token tk = { .type = T_SUBA, .V.aobj = st[lv] };if (st[lv] == NULL) tk.V.aobj = tp[lv];node = astnode_new (tk);if (st[lv-1] == NULL){st[lv-1] = node; st[lv] = NULL; tp[lv] = NULL;}else{if (st[lv-1]->right == NULL){st[lv-1]->right = node;}else{AstNode tmp = st[lv-1]->right;while (tmp->right != NULL)tmp = tmp->right;tmp->right = node;}st[lv] = NULL; tp[lv] = NULL;}lv--; PFI("RPAR: %c\n", c);}break;case ' ': break; default:PFI("Error: At index %d, [%c], Syntax error!\n", idx, c); exit (0);}idx++; c = estr[idx];}if (st[lv] == NULL){if (tp[lv] == NULL) {} else head = tp[lv];}else{head = st[lv];}return head;
}
Token
eval_astnode (AstNode node)
{int index = 0, count = 0;Token *tks, tk, stk[16] = {0};int sp = 0;count = astnode_count (node);tks = (Token*) malloc (count * sizeof(Token));astnode_postorder_trav_toa (node, tks, &index);PF("Node count: %d\n", count);PF("Node index: %d\n", index);for (int i = 0; i < count; i++){switch (tks[i].type){case T_NUMB: {PFI("PUSH %ld, SP: %d\n", tks[i].V.ival, sp);stk[sp] = tks[i]; sp = sp + 1;}break;case T_OPER: {switch (tks[i].V.oval[0]){case '+': PFI("ADD %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);stk[sp-2].V.ival = stk[sp-2].V.ival + stk[sp-1].V.ival;PFI(" ==> %ld\n", stk[sp-2].V.ival);stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;break;case '-': if (sp-2 < 0){PFI("SUB %ld", stk[sp-1].V.ival);stk[sp-1].V.ival = -(stk[sp-1].V.ival);PFI(" ==> %ld\n", stk[sp-1].V.ival);break;}PFI("SUB %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);stk[sp-2].V.ival = stk[sp-2].V.ival - stk[sp-1].V.ival;PFI(" ==> %ld\n", stk[sp-2].V.ival);stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;break;case '*': PFI("MUL %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);stk[sp-2].V.ival = stk[sp-2].V.ival * stk[sp-1].V.ival;PFI(" ==> %ld\n", stk[sp-2].V.ival);stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;break;case '/': if (stk[sp-1].V.ival == 0){PF("Error: The divisor cannot be zero!\n");break;}PFI("DIV %ld %ld", stk[sp-2].V.ival, stk[sp-1].V.ival);stk[sp-2].V.ival = stk[sp-2].V.ival / stk[sp-1].V.ival;PFI(" ==> %ld\n", stk[sp-2].V.ival);stk[sp-1].type = 0; stk[sp-1].V.nval = 0; sp = sp - 1;break;}}break;}}free (tks);tk = stk[0];return tk;
}
#define EXPRSIZE 1024
void
expr_shell (void)
{char buf[EXPRSIZE] = {0};int idx = 0;char c;PF("EXPR:> ");c = fgetc (stdin);while (c != EOF){if (c == '\n'){Token tk = { .type = 0, .V.nval = 0 };AstNode root = NULL;PF("------------------------------\n");root = parse_string (buf);PF("------------------------------\n MID: ");astnode_inorder_trav (root);PF("\n------------------------------\n");tk = eval_astnode (root);PF("------------------------------\n");PF("RESULT: %ld\n", tk.V.ival);PF("------------------------------\n");astnode_free (root);memset (buf, 0, EXPRSIZE); idx = 0;PF("EXPR:> ");}else{buf[idx] = c; idx++;}c = fgetc (stdin);}PF("\nBye!\n");
}
void
test_eval (void)
{AstNode root = NULL;Token tk;char *st = "1001/(90-90)";PF("EXPR: %s\n", st);PF("--------------------\n");root = parse_string (st);PF("--------------------\n");PF(" MID: "); astnode_inorder_trav (root); PF("\n");PF("--------------------\n");PF("PREV: "); astnode_prevorder_trav (root); PF("\n");PF("--------------------\n");astnode_visual_trav (root);PF("--------------------\n");PF("POST: "); astnode_postorder_trav (root); PF("\n");PF("--------------------\n");tk = eval_astnode (root);PF("--------------------\n");PF("RESULT: %ld\n", tk.V.ival);PF("--------------------\n");astnode_free (root);
}
void
test_parse (void)
{AstNode root = NULL;char *st = "1+(2-(3+(4-5)))";PF("EXPR: %s\n", st);PF("--------------------\n");root = parse_string (st);PF("--------------------\n");PF(" MID: "); astnode_inorder_trav (root); PF("\n");PF("--------------------\n");PF("PREV: "); astnode_prevorder_trav (root); PF("\n");PF("--------------------\n");PF("POST: "); astnode_postorder_trav (root); PF("\n");PF("--------------------\n");astnode_visual_trav (root);PF("--------------------\n");astnode_free (root);
}
void
test_astnode (void)
{AstNode root = NULL, tpl = NULL, tpr = NULL;Token tka = { .type = T_NUMB, .V.ival = 1 };Token tkb = { .type = T_NUMB, .V.ival = 2 };Token tkc = { .type = T_OPER, .V.oval[0] = '+' };Token tkd = { .type = T_NUMB, .V.ival = 3 };Token tke = { .type = T_OPER, .V.oval[0] = '-' };tpl = astnode_new (tka); tpr = astnode_new (tkb); root = astnode_new (tkc); root->left = tpl;root->right = tpr;tpl = root;tpr = astnode_new (tkd); root = astnode_new (tke); root->left = tpl;root->right = tpr;PF(" MID: ");astnode_inorder_trav (root); PF("\n");PF("PREV: ");astnode_prevorder_trav (root); PF("\n");PF("POST: ");astnode_postorder_trav (root); PF("\n");astnode_free (root);
}
int
main (int argc, char *argv[])
{expr_shell ();return 0;
}