编译原理实验有点难,但是加上ai的辅助就会很简单,下面梳理一下代码流程。
全代码在github仓库中,链接:NeiFeiTiii/CompilerOriginTest at Version2.0,感谢star一下
一、项目结构
关键内容就是里面的那几个.c和.h文件。还有那三个txt文件,用于输入输出的存储。
二、代码流程
标准就是先进入词法、然后进行语法和语义的分析,最后生成四元式完成中间代码生成。下面是main.c文件:
#include "PraseWithRecursive.h"void clearFile(const char *filename) {FILE *file = fopen(filename, "w");if (file != NULL) {fclose(file);}
}
int main() {clearFile("Lex.txt");PraseWithRecursive();return 0;
}
这里就是清空一下要输出到的文件内容,然后进到递归下降分析的语法语义的执行中,但是这里不要在意,语法语义会调用词法的函数,来进行一个取得下一个二元式。
void scanner() {token = getNextToken();word.Class = token.type;switch (word.Class) {case ID:case INT:case REAL:strcpy(word.Value.Val1, token.value);break;case PLUS:case MINUS:case MUL:case DIV:case LP:case RP:case EQ:case IS:case LT:case GT:strcpy(word.Value.Val4, token.value);break;case END:printf("End of tokens");return;default:ErrorPrint("Unknown token type");exit(1);}
}
本项目的关键在于getNextToken()这个函数,通过它将前后实验联系在一起。
最后结果将会保存在Lex和Output这两个文件里面。
三、代码细节
1.词法
词法的代码细节是最关键的,它关乎到后面两个实验的实验结果,它的输出牵一发而动全身,主要就是完成二三实验调用的获取下一个token的接口。
下面是长代码,需要滑动屏幕:
Token getNextToken() {static FILE *fp = NULL;static int initialized = 0;static long file_offset = 0;if (!initialized) {fp = fopen("input.txt", "r");if (fp == NULL) {printf("Error: Cannot open source file\n");exit(1);}initialized = 1;}fseek(fp, file_offset, SEEK_SET);char ch;int i, c;TOKEN = (char *)malloc(MAX_TOKEN_LENGTH * sizeof(char));if (TOKEN == NULL) {fprintf(stderr, "Error: Memory allocation failed\n");exit(1);}ch = fgetc(fp);while (1) {if (ch == EOF) {Token token = {END, " ", -1, -1};out(token);free(TOKEN);file_offset = ftell(fp);return token;}fseek(fp, -1, SEEK_CUR);ch = fgetc(fp);if (ch == '#') {while (ch != '\n') {ch = fgetc(fp);column_number++;}continue;}if (isalpha(ch)) { // IDTOKEN[0] = ch;column_number++;i = 1;ch = fgetc(fp);if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);}else{while (isalnum(ch)) {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);}}if (ch != EOF)fseek(fp, -1, SEEK_CUR);}TOKEN[i] = '\0';file_offset = ftell(fp);c = lookup(TOKEN);if (c == ID) {Token token = {ID, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);return token;}}if (isdigit(ch)) {TOKEN[0] = ch;ch = fgetc(fp);column_number++;i = 1;int is_real = 0;int is_octal = (TOKEN[0] == '0');int is_hex = 0;if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} else {if (is_octal && (ch == 'x' || ch == 'X')) { // 16进制is_octal = 0;is_hex = 1;TOKEN[i] = ch;i++;column_number++;ch = fgetc(fp);if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} else {while ((ch <= 70 && ch >= 65) || (ch <= 102 && ch >= 97) || isdigit(ch)) {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;}if (ch != EOF)fseek(fp, -1, SEEK_CUR);}}if (is_octal && ch != 'x' && ch != 'X') { // 8进制is_octal = 0;while (isdigit(ch)) {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;is_octal = 1;}if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} elsefseek(fp, -1, SEEK_CUR);}while (isdigit(ch)) { // 10进制TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;}if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} else {if (ch == '.') {TOKEN[i] = ch;is_real = 1;i++;ch = fgetc(fp);while (isdigit(ch)) {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;}if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} elsefseek(fp, -1, SEEK_CUR);}if (ch == 'e' || ch == 'E') {is_real = 1;TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);ErrorPrint("Real number not complete,In the end of file");} else {if (ch == '+' || ch == '-') {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);ErrorPrint("Real number not complete,In the end of file");while (isdigit(ch)) {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;}if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} elsefseek(fp, -1, SEEK_CUR);}while (isdigit(ch)) {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;}if (ch == EOF) {fseek(fp, 0, SEEK_END);file_offset = ftell(fp);} elsefseek(fp, -1, SEEK_CUR);}}}if (ch != EOF)fseek(fp, -1, SEEK_CUR);}}TOKEN[i] = '\0';if (is_real) {Token token = {REAL, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);file_offset = ftell(fp);return token;} else if (is_hex) {Token token = {HEX, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);file_offset = ftell(fp);return token;} else if (is_octal) {Token token = {OCTAL, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);file_offset = ftell(fp);return token;} else {Token token = {INT, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);file_offset = ftell(fp);return token;}}if (ch == '"') {ch = fgetc(fp);column_number++;i = 0;while (ch != '"') {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;if (ch == EOF) {Token token = {END, " ", -1, -1};report_error("String not closed");out(token);free(TOKEN);return token;}}TOKEN[i] = '\0';Token token = {STRING, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);file_offset = ftell(fp);return token;}if (ch == '\'') { // 字符ch = fgetc(fp);column_number++;i = 0;while (ch != '\'') {TOKEN[i] = ch;i++;ch = fgetc(fp);column_number++;}TOKEN[i] = '\0';Token token = {CHAR, "", line_number, column_number};strcpy(token.value, TOKEN);out(token);free(TOKEN);file_offset = ftell(fp);return token;}if (ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == ';' || ch == ',') {Token token;if (ch == '(') token = (Token){LP, "(", line_number, column_number};else if (ch == ')') token = (Token){RP, ")", line_number, column_number};else if (ch == '{') token = (Token){BRACKET, "{", line_number, column_number};else if (ch == '}') token = (Token){BRACKET, "}", line_number, column_number};else if (ch == '[') token = (Token){BRACKET, "[", line_number, column_number};else if (ch == ']') token = (Token){BRACKET, "]", line_number, column_number};else if (ch == ';') token = (Token){DOT, ";", line_number, column_number};else token = (Token){DOT, ",", line_number, column_number};out(token);column_number++;free(TOKEN);file_offset = ftell(fp);return token;}if (ch == '=' || ch == '!' || ch == '<' || ch == '>' || ch == ':') {char next_ch = fgetc(fp);column_number++;Token token;if (ch == '=' && next_ch == '=') token = (Token){EQ, "", line_number, column_number};else if (ch == '!' && next_ch == '=') token = (Token){NE, "", line_number, column_number};else if (ch == '<' && next_ch == '=') token = (Token){LE, "", line_number, column_number};else if (ch == '>' && next_ch == '=') token = (Token){GE, "", line_number, column_number};else if (ch == ':' && next_ch == '=') token = (Token){IS, "", line_number, column_number};else {fseek(fp, -1, SEEK_CUR);column_number--;if (ch == '=') token = (Token){IS, " ", line_number, column_number};else if (ch == '!') token = (Token){NOT, " ", line_number, column_number};else if (ch == '<') token = (Token){LT, " ", line_number, column_number};else if (ch == '>') token = (Token){GT, " ", line_number, column_number};else {report_error("Unknown operator");out(token);free(TOKEN);exit(1);}}out(token);free(TOKEN);file_offset = ftell(fp);return token;}if (ch == '+' || ch == '-' || ch == '*' || ch == '/') {Token token;if (ch == '+') token = (Token){PLUS, " ", line_number, column_number};else if (ch == '-') token = (Token){MINUS, " ", line_number, column_number};else if (ch == '*') token = (Token){MUL, " ", line_number, column_number};else token = (Token){DIV, " ", line_number, column_number};out(token);column_number++;free(TOKEN);file_offset = ftell(fp);return token;}if (isspace(ch)) {if (ch == '\n') {line_number++;column_number = 0;} else {column_number++;}ch = fgetc(fp);continue;}else {report_error("Unknown character");Token token = {UNKNOWN, " ", line_number, column_number};out(token);free(TOKEN);exit(1);}}
}
这里的逻辑就是获取文件中的一个字母,看看是什么类型(即if), 如果符合这个类型,就继续检查下一个字母(即while),直到这个字母不符合条件,记录检测文件的断点,返回这个token。思路清晰,代码逻辑也就不攻自破。
2.语法、语义
这里几乎是糅合在一块了,使用递归下降的方法,之后打印结果。
char *E(void) {char opp[3], *E1_place, *E2_place, *Temp_place;E1_place = T();while (word.Class == PLUS || word.Class == MINUS || word.Class == EQ) {if (word.Class == PLUS) {strcpy(opp, "+");} else if (word.Class == MINUS) {strcpy(opp, "-");} else if (word.Class == EQ) {strcpy(opp, "==");}scanner();E2_place = T();Temp_place = NewTemp();GEN(opp, E1_place, E2_place, Temp_place);E1_place = Temp_place;}return E1_place;
}char *T(void) {char opp[3], *T1_place, *T2_place, *Temp_place;T1_place = F();while (word.Class == MUL || word.Class == DIV || word.Class == LT || word.Class == GT) {if (word.Class == MUL) {strcpy(opp, "*");} else if (word.Class == DIV) {strcpy(opp, "/");} else if (word.Class == LT) {strcpy(opp, "<");} else if (word.Class == GT) {strcpy(opp, ">");}scanner();T2_place = F();Temp_place = NewTemp();GEN(opp, T1_place, T2_place, Temp_place);T1_place = Temp_place;}return T1_place;
}char *F(void) {char *place;if (word.Class == ID || word.Class == INT || word.Class == REAL) { // 标识符place = strdup(token.value); // 自动分配内存,然后复制字符串scanner();return place;} else if (word.Class == LP) { // 左括号scanner();place = E();if (word.Class == RP) { // 右括号scanner();return place;} else {ErrorPrint("dont have')'");}} else {ErrorPrint("Not A Valid Prase");}return NULL; // 无效返回,其实根本不会到这里,完全是做个警告处理
}char *A(void) { // 赋值语句的处理char *E_place, *A_place;E_place = E();if (word.Class == IS) {scanner();A_place = E();GEN("=", E_place, "", A_place);return A_place;}return E_place;
}