语法分析
GUET 编译原理课设,当时找了很多资料,网上的资料乱七八糟,花了哥哥我好久时间。
致谢:代码的算法有参考书本其他博主的,表示感谢,同时也更正他们的了一些微小错误和不合理地方。
这里,我用书本的例子数据作为测试。
西北工业大学出版社《编译原理》蒋立源 第 3 版
第 157 页
代码中的约定:
- 用 0 表示空
- 用 102 表示移进 s2
- 用 53 表示规约 r3
#include <iostream>
#include<stdio.h>
#include<stdlib.h>
using namespace std;
// 0 为空白
//102 表示 S2
//51 表示 r1
int actionTable[12][6] = {
102,0,0,0,0,0,
0,0,0,0,0,-1,
0,104,0,0,0,0,
0,0,105,0,0,0,
0,0,53,0,0,0,
0,107,0,108,0,0,
0,0,0,0,109,0,
0,0,52,0,0,0,
0,0,110,0,55,0,
0,0,0,0,0,51,
0,0,0,108,0,0,
0,0,0,0,54,0
};
int gotoTable[12][3] = {
1,0,0,0,0,0,
0,3,0,0,0,0,
0,0,0,0,0,6,
0,0,0,0,0,0,
0,0,0,0,0,0,
0,0,11,0,0,0
};
// 语法表,存储读取的文法
char Grammar[20][10] = {'\0'};
char vtArray[10], vnArray[10];
//action 表横坐标数组:用于定位,查表
char actionAxis[6] = {'b', 'd', 't', 's', 'e', '#'};
//goto 表横坐标数组:用于定位,查表
char gotoAxis[3] = {'B', 'D', 'S'};
// 非终结符数量,终结符数量,状态数量
int vnNumber, vtNumber, stateNumber = 12;
int LR[10];
int grammarNum;
typedef struct {
char * base;
char * top;
}
SymbolStack;
typedef struct {
int * base;
int * top;
}
StateStack;
StateStack state;
SymbolStack symbol;
// 扫描文法文件,得到文法
// 左部符号存储在第一列,即 Grammar[i][0]
int scanGrammar() {FILE * fp = fopen("slr.txt", "r");
FILE * tp;
char ch, nextCh;
int i = 0, j = 0, k, count;
while (!feof(fp)) {fscanf(fp, "%c", &ch);
// 文件结束标志
if (ch == '?') {Grammar[i][j] = '\0';
break;
}
if (ch == '\n') {Grammar[i][j] = '\0';
// 转移到下一行第一列
i++;
j = 0;
continue;
}
if (ch == '-') {
tp = fp;
fscanf(tp, "%c", &nextCh);
if (nextCh == '>') {
fp = tp;
continue;
}
}
if (ch == '|') {Grammar[i + 1][0] = Grammar[i][0];
Grammar[i][j] = '\0';
i++;
j = 1;
continue;
}
Grammar[i][j] = ch;
if (ch >= 'A' && ch <= 'Z') {
count = 0;
while (vnArray[count] != ch && vnArray[count] != '\0') {count++;}
if (vnArray[count] == '\0') {
vnNumber = count + 1;
// 拓广文法的开始符 W
if (ch == 'W') {
j++;
continue;
}
vnArray[count] = ch;
vnNumber = count + 1;
}
}
else {
count = 0;
while (vtArray[count] != ch && vtArray[count] != '\0') {count++;}
if (vtArray[count] == '\0') {vtArray[count] = ch;
vtNumber = count + 1;
}
}
j++;
}
cout << "输入的文法:" << endl;
for (k = 0; k <= i; k++) {
j = 0;
while (Grammar[k][j] != '\0') {if (j == 1) {printf("->");
}
printf("%c", Grammar[k][j]);
j++;
}
cout << endl;
}
count = 0;
cout << endl << "vn:" << endl;
while (vtArray[count] != '\0') {printf("%3c", vtArray[count]);
count++;
}
vtArray[count] = '#';
vtNumber = count + 1;
printf("%3c", vtArray[count]);
cout << endl << "vn:" << endl;
count = 0;
while (vnArray[count] != '\0') {printf("%3c", vnArray[count]);
count++;
}
cout << endl;
fclose(fp);
grammarNum = i + 1;
return i;
}
// 计算 LR(0)项目长度,决定了:规约时符号栈和状态栈的出栈次数
int getLRLength() {
int i, j;
for (i = 0; i < grammarNum; i++) {
j = 1;
while (Grammar[i][j] != '\0') {j++;}
// 产生式 i 的长度为 j
LR[i] = j;
}
return 0;
}
// 创建栈
void createStack() {state.base = (int *)malloc(100 * sizeof(int));
if (!state.base)
exit(1);
state.top = state.base;
*state.top = 0;
symbol.base = (char *)malloc(100 * sizeof(char));
if (!symbol.base)
exit(1);
symbol.top = symbol.base;
*symbol.top = '#';
}
// 查 Action 表
int getActionValue(int stateTop, char ch) {
int i, j;
// 查找
// 更改 i 为 stateTop
/*for (i = 0; i < stateNumber; i++) {if (stateTop == i) {break;}
}*/
// 查找横坐标数组 actionAxis,定位
for (j = 0; j < vtNumber; j++) {if (ch == actionAxis[j])
break;
}
return actionTable[stateTop][j];
}
// 查 Goto 表
int getGotoValue(int stateTop, char inputChar) {
int i, j;
for (j = 0; j < vnNumber; j++) {if (inputChar == gotoAxis[j])
break;
}
if (gotoTable[stateTop][j] == 0) {//cout << "Error" << endl;}
return gotoTable[stateTop][j];
}
// 输出信息
int printInfo(int count, int i, char chars[], int actionValue) {
int * p = state.base, stateNum;
int j, jj;
char * q = symbol.base, symbolNum;
printf("%d\t", count);
while (p != state.top + 1) {
stateNum = *p;
printf("%d", stateNum);
p++;
}
printf("\t");
while (q != symbol.top + 1) {
symbolNum = *q;
printf("%c", symbolNum);
q++;
}
printf("\t");
j = i;
jj = 0;
while (jj < j) {printf(" ");
jj++;
}
while (chars[j] != '\0') {printf("%c", chars[j]);
j++;
}
if (actionValue > 100) {cout << "\t\ts" << actionValue - 100 << "\t" << actionValue - 100 << endl;}
// 如果是规约,把预留下一状态给规约程序输出,在此不清除下一状态为多少
else if (actionValue > 50) {cout << "\t\tr" << actionValue - 50 << "\t";}
else if (actionValue == -1) {cout << "\t\tacc" << "\n\n 可以识别的输入串 \n" << endl;}
else {return 0;}
return 1;
}
// 规约 -- 根据编号为 i 的产生式进行规约
// 规约的结果是一个非终结符号,即 Grammar[i][0]
int redution(int i) {
int * p, stateNum, gotoValue, LRLength;
LRLength = LR[i] - 1;
while (LRLength != 0) {
state.top--;
symbol.top--;
LRLength--;
}
int temp = *state.top;
state.top++;
*state.top = getGotoValue(temp, Grammar[i][0]);
symbol.top++;
*symbol.top = Grammar[i][0];
stateNum = *state.top;
if (*state.top == 0) {return *state.top;}
return *state.top;
}
// 扫描输入字符串
int scanChars() {char chars[20] = {'b', 'd', 't', 's', 'e','#'};
int i = 0, stepCount = 1;
int actionValue, nextActionValue;
int stateTop, gotoValue;
//scanf("%s", &chars);
cout << "步骤 \t 状态 \t 栈中 \t 余留符号 分析动作 下一状态" << endl;
while (chars[i] != '\0') {
stateTop = *state.top;
// 向前查看一个符号,查 action 表
actionValue = getActionValue(stateTop, chars[i]);
// 如果是移进,就可以输出下一状态
// 如果是规约,则等到规约完毕再输出下一状态,因为现在不可知
printInfo(stepCount, i, chars, actionValue);
// 数组(表格)的值
if (actionValue == 0) {
cout <<endl<< "不合法的字符串!" << endl;
break;
}
//acc 接受
if (actionValue == -1) {
stepCount++;
return 1;
}
//s 移进
if (actionValue >= 100) {
actionValue = actionValue - 100;
state.top++;
*state.top = actionValue;
symbol.top++;
*symbol.top = chars[i];
i++;
stepCount++;
}
//r 规约
if (actionValue >= 50 && actionValue < 100) {
actionValue = actionValue - 50;
gotoValue = redution(actionValue);
// 规约完毕后,输出下一状态
cout << gotoValue << endl;
stepCount++;
}
}
return 0;
}
int main() {scanGrammar();
getLRLength();
createStack();
scanChars();
system("pause");
return 0;
}
文法 slr.txt
问号结尾
W->B
B->bDtSe
D->Dtd
D->d
S->stS
S->s?