1 概述
设计、编制并调试一个简单的C语言词法分析程序,掌握利用状态转换图设计词法分析器的基本方法,利用该词法分析器完成对源程序字符串的词法分析。通过对该词法分析器的设计,加深对词法分析原理、状态转换图等编译原理知识的理解。
2 使用的基本概念和原理
词法分析器读取的主要功能是输入源程序,输出单词符号,对源程序从左到右,从上到下对逐个字符进行扫描,把把构成源程序的字符串转换成单词符号的序列,输出单词序列用于语法分析。同时还可以滤掉空格、换行符、注释、行列计数,发现错误等功能。语法分析中单词符号主要分为:关键字、标识符、常数、运算符、界符5类,词法分析器可作为独立的一遍,也可作为语法分析器的子程序。
NFA与DFA转换
可以用子集法将NFA转换为DFA,再利用分割法将DFA最小化,正规式的整体变换如图所示:

3 总体设计
词法分析器首先将输入的文件进行读取,读取后对源程序的字符调用词法分析程序分析,根据字符种类来确定调用函数,将结果进行保存,随后输出,主程序示意图如图所示:

程序在调用词法分析器进行词法分析,根据类码表进行分析,词法分析程序框图如图所示

词法分析器的功能输入源程序,按照构词规则分解成一系列单词符号。单词是语言中具有独立意义的最小单位,包括关键字、标识符、运算符、界符和常数等,本课程设计的数据定义表如表所示:
表 部分单词符号及其分类码表
| 单词符号 | 类别编码 | 类别码的助记符号 | 单词值 |
| begin | 1 | BEGIN | 关键字 |
| end | 2 | END | |
| if | 3 | IF | |
| else | 4 | ELSE | |
| return | 5 | RETURN | |
| while | 6 | WHILE | |
| then | 7 | THEN | |
| for | 8 | FOR | |
| int | 9 | INT | |
| char | 10 | CHAR | |
| float | 11 | FLOAE | |
| main | 12 | MAIN | |
| cin | 13 | CIN | |
| cout | 14 | COUT | |
| 标识符 | 15 | BA | 字母数字串 |
| 常数(0-9) | 16 | CA | 数字串 |
| 运算符 (+、-、*、%、<、>、=、!) | 17 | DA | 数学运算符号 |
| 界符 (; , { } [ ] ( ) :)) | 18 | EA | 分隔符号 |
4 代码实现
/*************Written By WFP************//*******Date : 12,16,2022********/
#define _CRT_SECURE_NO_DEPRECATE
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <iomanip>using namespace std;
string KEYWORD[100] = { "begin","end", "if","else","return","while","then","for", //关键字"int","char","float","main","cin","cout" };
char SEPARATER[50] = { ';',',','{','}','[',']','(',')',':' }; //界符
char OPERATOR[50] = { '+','-','*','/','>','<','=','!','%'}; //运算符
char FILTER[10] = { ' ','\n','\t'}; //过滤符
const int IDENTIFIER = 100; //标识符值
const int CONSTANT = 101; //常数值
const int FILTER_VALUE = 102;
/**判断是否为关键字**/
int l = 1, a = 0;
bool IsKeyword(string word) {for (int i = 0; i < 18; i++) {if (KEYWORD[i] == word) {if (KEYWORD[i] == "begin")printf("助记符号:BEGIN\t");if (KEYWORD[i] == "end")printf("助记符号:END\t");if (KEYWORD[i] == "if")printf("助记符号:IF\t");if (KEYWORD[i] == "else")printf("助记符号:ELSE\t");if (KEYWORD[i] == "while")printf("助记符号:WHILE\t");if (KEYWORD[i] == "int")printf("助记符号:INT\t");if (KEYWORD[i] == "char")printf("助记符号:CHAR\t");if (KEYWORD[i] == "for")printf("助记符号:FOR\t");if (KEYWORD[i] == "return")printf("助记符号:RETURN");if (KEYWORD[i] == "cout")printf("助记符号:COUT\t");if (KEYWORD[i] == "float")printf("助记符号:FLOAT\t");if (KEYWORD[i] == "main")printf("助记符号:MAIN\t");if (KEYWORD[i] == "cin")printf("助记符号:CIN\t");if (KEYWORD[i] == "then")printf("助记符号:THEN\t");a++;return true;}}return false;
}
/**判断是否为界符**/
int b = 0;
bool IsSeparater(char ch) {for (int i = 0; i < 10; i++) {if (SEPARATER[i] == ch) {return true;}}return false;
}/**判断是否为运算符**/
int c = 0;
bool IsOperator(char ch) {for (int i = 0; i < 10; i++) {if (OPERATOR[i] == ch) {return true;}}return false;
}
/**判断是否为过滤符**/
bool IsFilter(char ch) {for (int i = 0; i < 4; i++) {if (FILTER[i] == ch) {if(ch=='\n')l++;return true;}}return false;
}
/**判断是否为大写字母**/
int e = 0;
bool IsUpLetter(char ch) {if (ch >= 'A' && ch <= 'Z') return true;return false;
}
/**判断是否为小写字母**/bool IsLowLetter(char ch) {if (ch >= 'a' && ch <= 'z') return true;return false;
}
/**判断是否为常数**/
int f = 0;
bool IsDigit(char ch) {if (ch >= '0' && ch <= '9') return true;return false;
}template <class T>
int value(T* a, int n, T str) {for (int i = 0; i < n; i++) {if (a[i] == str) return i + 1;}return -1;
}
int g = 0;
int h = 0;
int i = 0;
/**词法分析**/void analyse(FILE* fpin) {char ch = ' ';string arr = "";while ((ch = fgetc(fpin)) != EOF) {arr = "";if (IsFilter(ch)) {/*while (IsFilter(ch)) {arr += ch;if (ch == '\n') {l++;}ch = fgetc(fpin);}*/if(ch!=' ')printf("*********************第%d行*********************\n", l);} //判断是否为过滤符else if (IsLowLetter(ch)) { //判断是否为关键字while (IsLowLetter(ch)) {arr += ch;ch = fgetc(fpin);}//fseek(fpin,-1L,SEEK_CUR);if (IsKeyword(arr)) {printf("类别编码:%d\t", value(KEYWORD, 100, arr));//cout << arr << " 关键字" << endl;//if(IaKeyword(arr))cout << "文件中符号:" << arr << " 单词类型:" << "关键字" << endl;//printf("第%d行\t", l);}else{printf("助记符号:BA\t");printf("类别编码:15\t");//cout << arr << " 标识符" << endl;cout << "文件中符号:" << arr << " 单词类型:" << "标识符" << endl;//printf("第%d行\t", l);e++;}}else if (IsDigit(ch)) { //判断是否为常数while (IsDigit(ch) || (ch == '.' && IsDigit(fgetc(fpin)))) {arr += ch;printf("助记符号:CA\t");ch = fgetc(fpin);}fseek(fpin, -1L, SEEK_CUR);printf("类别编码:16\t");//cout << arr << " 整形数" <<endl;cout << "文件中符号:" << arr << " 单词类型:" << "常数" << endl;//printf("第%d行\t", l);f++;}else if (IsUpLetter(ch) || IsLowLetter(ch) || ch == '_') {while (IsUpLetter(ch) || IsLowLetter(ch) || ch == '_' || IsDigit(ch)) {arr += ch;ch = fgetc(fpin);}fseek(fpin, -1L, SEEK_CUR);printf("类别编码:15\t");//cout << arr << " 标识符" << endl;cout << "文件中符号:" << arr <<" 单词类型:" << "标识符" << endl;//printf("第%d行\t", l);e++;}else if (IsOperator(ch)||ch== '/') {arr += ch;if (ch == '*'){arr += ch;ch = fgetc(fpin);while (ch != '*') {arr += ch;ch = fgetc(fpin);if (ch == '*') {arr += ch;ch = fgetc(fpin);if (ch == '/') {i++;break;}}}}else if (ch == '/') {arr += ch;ch = fgetc(fpin);while (ch != '\n') {arr += ch;ch = fgetc(fpin);if (ch == ' ') {i++;break;}}}else {printf("助记符号:DA\t");printf("类别编码:17\t");//cout << arr << " 运算符" << endl;cout << "文件中符号:" << arr << " 单词类型:" << "运算符" << endl;//printf("第%d行\t", l);c++;}}//else if()else switch (ch) {case '+':case '-':case '*':case '/':case '>':case '<':case '=':case '!':{arr += ch;printf("助记符号:DA\t");printf("类别编码:17\t");//cout << arr << " 运算符" << endl;cout << "文件中符号:" << arr << " 单词类型:" << "运算符" << endl;//printf("第%d行\t", l);c++;break;}case ';':case ',':case '(':case ')':case '[':case ']':case '{':case '}':case ':':{arr += ch;printf("助记符号:EA\t");printf("类别编码:18\t");cout << "文件中符号:" << arr << " 单词类型:"<< "界符" << endl;//printf("第%d行\t", l);b++;break;}default:cout << "\"" << ch << "\":无法识别的字符!" << endl;g++;}}h = a + b + c + e + f + g;printf("文件内容一共有 %d 行\n", l);printf("一共有 %d 个字符\n", h);printf("基本保留字一共有 %d 个\n", a);printf("界符一共有 %d 个\n", b);printf("运算符一共有 %d 个\n", c);printf("标识符一共有 %d 个\n", e);printf("常数一共有 %d 个\n", f);printf("注释共有 %d 处\n", i);printf("错误共有 %d 个\n", g);}int main()
{char inFile[40];FILE* fpin;cout << "请输入源文件名(包括路径和后缀):";while (true) {cin >> inFile;if ((fpin = fopen(inFile, "r")) != NULL)break;else {cout << "文件名错误!" << endl;cout << "请输入源文件名(包括路径和后缀):";}}/*char ch = ' ';while ((ch = fgetc(fpin)) != EOF) {printf("%c", ch);}*/cout << "------词法分析如下------" << endl;cout << "*********************第1行*********************" << endl;//read(fpin);analyse(fpin);system("pause");return 0;}
6 测试和试运行
程序测试
进行测试文件编写,采用txt文件,程序内容如下所示:
int main (){
int a=1,b=2;
b / a;
b >a;
c=a +b;
cout <<c;return 0;int
}
通过对程序进行调试,调试控制台如图所示:

输入文件路径并运行,如图所示:

源码地址: https://github.com/wen3017/word-analyse/blob/analyse/word.cpp
课设文件需要留言







