前景提示:
个人觉得单纯是用来完成实验报告的话还行,但仅做参考,因为本人的编程水平有限,怕误人子弟。
本次代码支持以下操作:
单行注释
多行注释
文件形式输入
种别码可以在文件中自由修改
单词字符串识别支持:
部分关键字(可手动在程序外部---reference.txt文件添加,),
标识符,
无符号整形数字(仅可识别整形,其他类型需要自主添加别的函数支持),
界符(,{}()[]),
全部运算符(+、=、-、*、/、<、>,以及这些的两两组合)
代码程序:
// 编译原理词法分析器#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <map>
using namespace std;string getFormFile(string filePath);
bool isLetter(char ch);
bool isNumber(char ch);
bool isOperator(char ch);
bool isDelimiter(char ch);
string rmExegesis(const string);
string rmExegesis_1(const string str);
string rmExegesis_2(const string str);vector<string> compile();
vector<string> compile(const string originpath, const string targetpath);
map<string, string> getMap();
void showTime(map<string, string> refer, vector<string> key);int main()
{map<string, string> refer = getMap(); //从配置文件中拿到键值对cout << "请在文件 input.txt 中输入待编译程序" << endl;system("pause");vector<string> key = compile(); //拿到待识别字符showTime(refer, key);cout << "编译成功!" << endl;
}string getFormFile(string filePath)
{ifstream ifs;ifs.open(filePath);if (!ifs.is_open())cout << filePath << "打开失败!" << endl;ifs.seekg(0, ios::end);int size = ifs.tellg(); //得到文件大小 string str;str.resize(size);ifs.seekg(0, ios::beg);ifs.read((char*)str.c_str(), size);ifs.close(); //关闭了!return str;
}//单词
bool isLetter(char ch)
{if (ch > ('a' - 1) && ch < ('z' + 1))return true;else if (ch > ('A' - 1) && ch < ('Z' + 1))return true;else if (ch == '_')return true;elsereturn false;
}//数字
bool isNumber(char ch)
{if (ch > ('0' - 1) && ch < ('9' + 1))return true;elsereturn false;
}//算术运算符
bool isOperator(char ch)
{if (ch == '=' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ':' || ch == '<' || ch == '>')return true;elsereturn false;
}//界符
bool isDelimiter(char ch)
{if (ch == ';' || ch == ',' || ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '[' || ch == ']')return true;elsereturn false;
}//去单行注释函数
string rmExegesis_1(const string str)
{string temp = str;string result = temp;string str1;string str2;int len = temp.length();for (int i = 0; i < len;){auto be = temp.find("//", i);auto en = temp.find("\n", be) + 1;if (be == string::npos || en == string::npos){break;}str1.assign(temp, 0, be);str2.assign(temp, en, len - en);result = str1 + str2;temp = result;len = temp.length();}return result;
}//去多行注释函数
string rmExegesis_2(const string str)
{string temp = str;string result = temp;string str1;string str2;int len = temp.length();for (int i = 0; i < len;){auto be = temp.find("/*", i);auto en = temp.find("*/", i) + 2;if (en < be){i = be;en = temp.find("*/", i) + 2;}if (be == string::npos || en == string::npos){break;}str1.assign(temp, 0, be);str2.assign(temp, en, len - en);result = str1 + str2;temp = result;len = temp.length();}return result;
}//去单行和多行注释
string rmExegesis(const string str)
{string result = rmExegesis_1(str);return rmExegesis_2(result);
}vector<string> compile()
{string origin = "input.txt";string target = "output.txt";return compile(origin, target);
}vector<string> compile(const string originpath, const string targetpath)
{string str = getFormFile(originpath);str = rmExegesis(str); //去注释cout << str << endl; //输出去注释之后的文本vector<string> v; //存储字符int pos = 0;int len = str.find("#~");v.push_back("");for (int i = 0; i < len; i++){if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n') //空格,tab键,换行键作为分割符{if (v[pos] != ""){v.push_back("");pos++;}}else if (isLetter(str[i])){v[pos] = v[pos] + str[i];if (i + 1 < len && !isLetter(str[i + 1])){v[pos] = v[pos] + "$ident";v.push_back("");pos++;}}else if (isNumber(str[i])){v[pos] = v[pos] + str[i];if (i + 1 < len && !isNumber(str[i + 1])){v[pos] = v[pos] + "$idconst";v.push_back("");pos++;}}else if (isOperator(str[i])){v[pos] = v[pos] + str[i];if (i + 1 < len && isOperator(str[i + 1])){i = i + 1;v[pos] = v[pos] + str[i];v.push_back("");pos++;}else{v.push_back("");pos++;}}else if(isDelimiter(str[i])){v[pos] = v[pos] + str[i];v.push_back("");pos++;}else{v[pos] = v[pos] + str[i];v.push_back("");pos++;}}return v;
}map<string, string> getMap()
{string str = getFormFile("reference.txt"); //读取匹配文件map<string, string> refer;pair<string, string> p;string bpattern = "<reference>";string epattern = "</reference>";int ben = str.find(bpattern) + bpattern.size();int end = str.find(epattern);string key;string value;int flag1;int flag2;for (int i = ben; i < end; i++){if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')continue;flag1 = i;flag2 = (str.find(' ', i) < str.find('\t', i) ? str.find(' ', i) : str.find('\t', i));key.assign(str, flag1, flag2 - flag1);flag1 = str.find('$', flag2) + 1;flag2 = str.find('\n', i);value.assign(str, flag1, flag2 - flag1);p = make_pair(key, value);refer.insert(p);i = flag2;}return refer;
}void showTime(map<string, string> refer, vector<string> key)
{ cout << "符号\t\t\t" << "种别码\t\t\t" << "说明" << endl;int len = key.size();for (int i = 0; i < len; i++){auto pos = key[i].find('$');if (pos != string::npos){string key1;string key2;key1.assign(key[i], 0, pos);key2.assign(key[i], pos + 1, key[i].length() - pos);map<string, string>::iterator iter = refer.find(key1);if (iter != refer.end()){cout << key1 << "\t\t\t" << iter->second << "\t\t\t" << "关键字" << endl;}else{cout << key2 << "\t\t\t" << refer[key2] << "\t\t\t" << key1 << endl;}}else{if (key[i] == "")continue;cout << key[i] << "\t\t\t" << refer[key[i]] << "\t\t\t" << key[i] << endl;}}
}
运行示例:
文件结构:

示例1:
input.txt文件:
beginbeginbegin a:=1end;begin b:=1end;q:=10end;x:=9;y:=11;z:=12
end
#~
reference.txt文件
<reference>if $0then $1else $2while $3begin $4do $5end $6a $7:= $8+ $9- $10* $11/ $12+= $13-= $14*= $15/= $16, $17; $22( $18) $19{ $20} $21ident $100idconst $101
</reference>
输出

示例2:
input.txt文件:
#include <iostream>
using namespace std;void main()
{cout<<"hello world!!!"<<endl; //单行注释在这里!!! /*eryedhds
}
#~ //#~为结束标志符/*
多行注释在这里!!
asbhfafs
afs
adfsdfcdccs
csCSZ
C
ZXC
Scxcds sfasf saf sd f afd sfd a14 1 411:36 2022/11/711:36 2022/11/7wr#~
*/
reference.txt文件
<reference>if $0then $1else $2while $3begin $4do $5end $6:= $8+ $9- $10* $11/ $12+= $13-= $14*= $15/= $16> $30< $31, $17; $22( $18) $19{ $20} $21# $1111include $1112iostream $1113using $1114namespace $1115std $1116int $1117main $1118cout $1119endl $1120void $1121" $1122ident $100idconst $101! $1123<< $1124>> $1125
</reference>
输出









