编译原理 ~ 解析四则运算表达式~词法分析
从编译原理的角度去分析一个四则运算符表达式一个四则运算表达式有以下几种词素:
[*]数字(包括整数和小数)
[*]运算符(+-*/)
[*]括号(左括号和右括号)
状态图简单,这里就不画了。
下面直接上代码和结果吧(lua实现)
-- 四则运运算表达式的一些基本元素
local TokenType = {
Number = 0, -- 数字(包括整数和浮点数)
OpAdd = 1, -- +
OpSub = 2, -- -
OpMul = 3, -- *
OpDiv = 4, -- /
LeftParen = 5, -- (
RightParen = 6, -- )
EOF = 7 -- 结束符
}
local tbfmt = {__tostring = function(token)
return "{"..({ = "数字",
= "加号",
= "减号",
= "乘号",
= "除号",
= "左括号",
= "右括号",
= "结束符"})..' : <"'..token.lexeme..'">}'
end}
-- 创建Token
local function GenToken(_tokenType, _lexeme)
local token = {tokenType = _tokenType,
lexeme = _lexeme}
return setmetatable(token, tbfmt)
end
-- 创建一个源码字符串字符迭代器
local function GenSrcIter(sourceCode)
local idx = 0
local len = string.len(sourceCode)
return function()
idx = idx + 1
if idx <= len then
return sourceCode:byte(idx),
sourceCode:sub(idx, idx)
end
end
end
-- 判断数字
local function IsNumber(ch)
-- '0' = 0x30
-- '9' = 0x39
return 0x30 <= ch and ch <= 0x39
end
-- 判断运算符
local function IsOperator(ch)
return ch == 0x2A or -- *
ch == 0x2B or -- +
ch == 0x2D or -- -
ch == 0x2F -- /
end
-- 判断小数点
local function IsPoint(ch)
-- '.' = 0x2E
return ch == 0x2E
end
-- 判断括号
local function IsParen(ch)
-- '(' = 0x28
-- ')' = 0x29
return ch == 0x28 or ch == 0x29
end
-- 判断空白符
local function IsWhiteChar(ch)
-- '<空格>' = 0x20
-- '\t' = 0x09
-- '\n' = 0x0A
-- '\v' = 0x0B
-- '\f' = 0x0C
-- '\r' = 0x0D
return ch == 0x20 or
ch == 0x09 or
ch == 0x0A or
ch == 0x0B or
ch == 0x0C or
ch == 0x0D
end
-- 词法分析
-- 会返回一个Token序列
local function Tokenizer(sourceCode)
local tokens = {}
local idx = 1
local nextCh = GenSrcIter(sourceCode)
local ascii, ch = nextCh()
while true do
-- 跳过所有空白符
while ascii and IsWhiteChar(ascii) do
ascii, ch = nextCh()
end
if not ascii then
tokens = GenToken(TokenType.EOF, "EOF")
break
end
-- 数字
if IsNumber(ascii) then
local str = ""
while ascii and IsNumber(ascii) do
str = str..ch
ascii, ch = nextCh()
end
-- 遇到小数点
if ascii and IsPoint(ascii) then
str = str..ch
ascii, ch = nextCh()
while ascii and IsNumber(ascii) do
str = str..ch
ascii, ch = nextCh()
end
end
tokens = GenToken(TokenType.Number, str)
idx = idx + 1
-- 遇到运算符
elseif IsOperator(ascii) then
tokens = GenToken(({ = TokenType.OpAdd,
= TokenType.OpSub,
= TokenType.OpMul,
= TokenType.OpDiv}), ch)
idx = idx + 1
ascii, ch = nextCh()
-- 遇到括号
elseif IsParen(ascii) then
tokens = GenToken(({ = TokenType.LeftParen,
= TokenType.RightParen}), ch)
idx = idx + 1
ascii, ch = nextCh()
-- 遇到以小数点开头的小数
elseif IsPoint(ascii) then
local str = ch
ascii, ch = nextCh()
while ascii and IsNumber(ascii) do
str = str..ch
ascii, ch = nextCh()
end
tokens = GenToken(TokenType.Number, str)
idx = idx + 1
-- 遇到未定义的符号
else
error("遇到未定义符号: "..ch)
end
end
return tokens
end
local exp = "3.1415926\t+ .233*369111 / ( 4.0- 1)"
local tokens = Tokenizer(exp)
print(exp)
for _, token in ipairs(tokens) do
print(tostring(token))
end
未完待续。。。
支持一下
页:
[1]