您的位置:首页 > 游戏 > 游戏 > 青岛做网站定制_万网域名停靠_一键搭建网站工具_西安seo服务公司排名

青岛做网站定制_万网域名停靠_一键搭建网站工具_西安seo服务公司排名

2025/2/23 19:48:07 来源:https://blog.csdn.net/2301_77061352/article/details/144649336  浏览:    关键词:青岛做网站定制_万网域名停靠_一键搭建网站工具_西安seo服务公司排名
青岛做网站定制_万网域名停靠_一键搭建网站工具_西安seo服务公司排名

语法分析

C4的语法分析分为声明(declaration)、表达式(expr)和语句(stmt)的解析。

常见的解析手段有通用、自顶向下和自底向上三种,C4采用的是自顶向下解析。

语法分析在C4中的框架如下:

输入
词素
中间形式
获取下一个词素
源程序
词法分析
语法分析
虚拟机
执行

还有一个重要的部分,那就是符号表:

词法分析
语法分析
符号表

解析声明

词法分析已经帮助我们捕捉到了词素的类型,现在该语法分析登场了,首先是解析全局声明,这些类型有:int、char、enum、函数

C4采用自顶向下解析,但是本身并不支持递归,所以不能使用递归下降进行解析。

自顶向下解析:

假设我们有文法如下:

E->TE'
E'->+ TE' | 空
T->FT'

自顶向下解析如下:

E

其实就是树的遍历展开:

E
T
E'
F
T'

推完左边再推右边就行了,就不多展示了。

文法

int和char的文法不需要过多讲述,重点讲一讲enum和函数的文法:

enum文法:

<EnumDecl> ::= "enum" <EnumName> "{" <EnumList> "}"
<EnumName> ::= "IDENTIFIER"
<EnumList> ::= <EnumItem> | <EnumItem> "," <EnumList>
<EnumItem> ::= "IDENTIFIER"

把C4中的文法写成一行,这样看起来就简洁多了:

enum的文法:enum_decl ::= 'enum' [identifier] '{' identifier ['=' number] {',' identifier ['=' number] '}'

函数文法:

<FuncDecl> ::= <ReturnType> <FuncName> "(" <ParamList> ")" "{" <FuncBody> "}"
<ReturnType> ::= "void" | "int" | "char" | "IDENTIFIER"
<FuncName> ::= "IDENTIFIER"
<ParamList> ::= <Param> | <Param> "," <ParamList> | ""
<FuncBody> ::= <StatementList>

以下就是C4中对应的文法解析:

  // parse declarationsline = 1;next();while (tk) {bt = INT; // basetypeif (tk == Int) next();else if (tk == Char) { next(); bt = CHAR; }else if (tk == Enum) {//enum的解析开始next();if (tk != '{') next();if (tk == '{') {next();i = 0;while (tk != '}') {if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; }next();if (tk == Assign) {next();if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; }i = ival;next();}id[Class] = Num; id[Type] = INT; id[Val] = i++;if (tk == ',') next();}next();}}while (tk != ';' && tk != '}') {ty = bt;while (tk == Mul) { next(); ty = ty + PTR; }if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }next();id[Type] = ty;if (tk == '(') { // 函数解析从这里开始id[Class] = Fun;id[Val] = (int)(e + 1);next(); i = 0;while (tk != ')') {ty = INT;if (tk == Int) next();else if (tk == Char) { next(); ty = CHAR; }while (tk == Mul) { next(); ty = ty + PTR; }if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }id[HClass] = id[Class]; id[Class] = Loc;id[HType]  = id[Type];  id[Type] = ty;id[HVal]   = id[Val];   id[Val] = i++;next();if (tk == ',') next();}next();if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }loc = ++i;next();while (tk == Int || tk == Char) {bt = (tk == Int) ? INT : CHAR;next();while (tk != ';') {ty = bt;while (tk == Mul) { next(); ty = ty + PTR; }if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }id[HClass] = id[Class]; id[Class] = Loc;id[HType]  = id[Type];  id[Type] = ty;id[HVal]   = id[Val];   id[Val] = ++i;next();if (tk == ',') next();}next();}*++e = ENT; *++e = i - loc;while (tk != '}') stmt();*++e = LEV;id = sym; // unwind symbol table localswhile (id[Tk]) {if (id[Class] == Loc) {id[Class] = id[HClass];id[Type] = id[HType];id[Val] = id[HVal];}id = id + Idsz;}}else {id[Class] = Glo;id[Val] = (int)data;data = data + sizeof(int);}if (tk == ',') next();}next();}

声明解析完,接下来就是解析表达式(expr)和语句(stmt),顺便一提,C4的声明解析是放在main函数中进行的,也就是说,C4的声明必须在文法解析之前,像在函数语句后面定义变量的行为,C4是不支持的。

解析表达式

我们从词法分析那里得到了词素的类型,当判断它是表达式后,我们就可以开始解析了,把它转换为虚拟机可识别的指令流,表达式的解析就是一元运算符、二元运算符和三元与算法的解析,例如加减乘除、与或非这些,这些东西的文法就不详细将了,详细地把每种运算符的解析文法都表示出来有点难受,毕竟笔者的分析只是以框架为主。

从框架中我们可以得知,语法分析就是在往内存模型中输出,之后就是虚拟机了,所以解析结果重点是虚拟机指令。

void expr(int lev)
{int t, *d;if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; }else if (tk == '"') {*++e = IMM; *++e = ival; next();while (tk == '"') next();data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;}else if (tk == Sizeof) {next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }while (tk == Mul) { next(); ty = ty + PTR; }if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);ty = INT;}else if (tk == Id) {d = id; next();if (tk == '(') {next();t = 0;while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }next();if (d[Class] == Sys) *++e = d[Val];else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }else { printf("%d: bad function call\n", line); exit(-1); }if (t) { *++e = ADJ; *++e = t; }ty = d[Type];}else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }else {if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }else { printf("%d: undefined variable\n", line); exit(-1); }*++e = ((ty = d[Type]) == CHAR) ? LC : LI;}}else if (tk == '(') {next();if (tk == Int || tk == Char) {t = (tk == Int) ? INT : CHAR; next();while (tk == Mul) { next(); t = t + PTR; }if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }expr(Inc);ty = t;}else {expr(Assign);if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }}}else if (tk == Mul) {next(); expr(Inc);if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }*++e = (ty == CHAR) ? LC : LI;}else if (tk == And) {next(); expr(Inc);if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); }ty = ty + PTR;}else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }else if (tk == Add) { next(); expr(Inc); ty = INT; }else if (tk == Sub) {next(); *++e = IMM;if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }ty = INT;}else if (tk == Inc || tk == Dec) {t = tk; next(); expr(Inc);if (*e == LC) { *e = PSH; *++e = LC; }else if (*e == LI) { *e = PSH; *++e = LI; }else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }*++e = PSH;*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);*++e = (t == Inc) ? ADD : SUB;*++e = (ty == CHAR) ? SC : SI;}else { printf("%d: bad expression\n", line); exit(-1); }while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" methodt = ty;if (tk == Assign) {next();if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;}else if (tk == Cond) {next();*++e = BZ; d = ++e;expr(Assign);if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }*d = (int)(e + 3); *++e = JMP; d = ++e;expr(Cond);*d = (int)(e + 1);}else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }else if (tk == Lan) { next(); *++e = BZ;  d = ++e; expr(Or);  *d = (int)(e + 1); ty = INT; }else if (tk == Or)  { next(); *++e = PSH; expr(Xor); *++e = OR;  ty = INT; }else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }else if (tk == And) { next(); *++e = PSH; expr(Eq);  *++e = AND; ty = INT; }else if (tk == Eq)  { next(); *++e = PSH; expr(Lt);  *++e = EQ;  ty = INT; }else if (tk == Ne)  { next(); *++e = PSH; expr(Lt);  *++e = NE;  ty = INT; }else if (tk == Lt)  { next(); *++e = PSH; expr(Shl); *++e = LT;  ty = INT; }else if (tk == Gt)  { next(); *++e = PSH; expr(Shl); *++e = GT;  ty = INT; }else if (tk == Le)  { next(); *++e = PSH; expr(Shl); *++e = LE;  ty = INT; }else if (tk == Ge)  { next(); *++e = PSH; expr(Shl); *++e = GE;  ty = INT; }else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }else if (tk == Add) {next(); *++e = PSH; expr(Mul);if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL;  }*++e = ADD;}else if (tk == Sub) {next(); *++e = PSH; expr(Mul);if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }else *++e = SUB;}else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }else if (tk == Inc || tk == Dec) {if (*e == LC) { *e = PSH; *++e = LC; }else if (*e == LI) { *e = PSH; *++e = LI; }else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);*++e = (tk == Inc) ? ADD : SUB;*++e = (ty == CHAR) ? SC : SI;*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);*++e = (tk == Inc) ? SUB : ADD;next();}else if (tk == Brak) {next(); *++e = PSH; expr(Assign);if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL;  }else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }*++e = ADD;*++e = ((ty = t - PTR) == CHAR) ? LC : LI;}else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }}
}

解析语句

C4解析的语句,包括 if 语句、while 语句、return 语句、块语句和表达式语句。

解析文法如下:

<stmt> ::= 'if' '(' <expr> ')' <stmt> ['else' <stmt>]| 'while' '(' <expr> ')' <stmt>| 'return' [<expr>] ';'| '{' {<stmt>} '}'| ';'| <expr> ';'<expr> ::= <identifier> '=' <expr>| <identifier> '+' <expr>| <identifier> '-' <expr>| <identifier> '*' <expr>| <identifier> '/' <expr>| <identifier>| <number><identifier> ::= [A-Za-z_][A-Za-z0-9_]*
<number> ::= [0-9]+

对应程序如下:

void stmt()
{int *a, *b;if (tk == If) {next();if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }expr(Assign);if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }*++e = BZ; b = ++e;stmt();if (tk == Else) {*b = (int)(e + 3); *++e = JMP; b = ++e;next();stmt();}*b = (int)(e + 1);}else if (tk == While) {next();a = e + 1;if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }expr(Assign);if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }*++e = BZ; b = ++e;stmt();*++e = JMP; *++e = (int)a;*b = (int)(e + 1);}else if (tk == Return) {next();if (tk != ';') expr(Assign);*++e = LEV;if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }}else if (tk == '{') {next();while (tk != '}') stmt();next();}else if (tk == ';') {next();}else {expr(Assign);if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }}
}

总结

C4的整体框架分析就结束了,其实细节还是非常多的,笔者只是粗略分析了一下源码的结构,不过源码有些粗糙,特别是这些’{’ '}'的写法和命名风格,看得有点难受,但不得不说,C4的设计确实非常厉害,特别是实现了自举这一点,如果读者想深入学习一下C4,不妨照着框架把C4重构一下。

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com