从编译原理的角度去分析一个四则运算符表达式
一个四则运算表达式有以下几种词素:
- 数字(包括整数和小数)
- 运算符(+-*/)
- 括号(左括号和右括号)
状态图简单,这里就不画了。
下面直接上代码和结果吧(lua实现)
[Lua] 纯文本查看 复制代码
-- 四则运运算表达式的一些基本元素
local TokenType = {
Number = 0, -- 数字(包括整数和浮点数)
OpAdd = 1, -- +
OpSub = 2, -- -
OpMul = 3, -- *
OpDiv = 4, -- /
LeftParen = 5, -- (
RightParen = 6, -- )
EOF = 7 -- 结束符
}
local tbfmt = {__tostring = function(token)
return "{"..({[TokenType.Number] = "数字",
[TokenType.OpAdd] = "加号",
[TokenType.OpSub] = "减号",
[TokenType.OpMul] = "乘号",
[TokenType.OpDiv] = "除号",
[TokenType.LeftParen] = "左括号",
[TokenType.RightParen] = "右括号",
[TokenType.EOF] = "结束符"})[token.tokenType]..' : <"'..token.lexeme..'">}'
end}
-- 创建Token
local function GenToken(_tokenType, _lexeme)
local token = {tokenType = _tokenType,
lexeme = _lexeme}
return setmetatable(token, tbfmt)
end
-- 创建一个源码字符串字符迭代器
local function GenSrcIter(sourceCode)
local idx = 0
local len = string.len(sourceCode)
return function()
idx = idx + 1
if idx <= len then
return sourceCode:byte(idx),
sourceCode:sub(idx, idx)
end
end
end
-- 判断数字
local function IsNumber(ch)
-- '0' = 0x30
-- '9' = 0x39
return 0x30 <= ch and ch <= 0x39
end
-- 判断运算符
local function IsOperator(ch)
return ch == 0x2A or -- *
ch == 0x2B or -- +
ch == 0x2D or -- -
ch == 0x2F -- /
end
-- 判断小数点
local function IsPoint(ch)
-- '.' = 0x2E
return ch == 0x2E
end
-- 判断括号
local function IsParen(ch)
-- '(' = 0x28
-- ')' = 0x29
return ch == 0x28 or ch == 0x29
end
-- 判断空白符
local function IsWhiteChar(ch)
-- '<空格>' = 0x20
-- '\t' = 0x09
-- '\n' = 0x0A
-- '\v' = 0x0B
-- '\f' = 0x0C
-- '\r' = 0x0D
return ch == 0x20 or
ch == 0x09 or
ch == 0x0A or
ch == 0x0B or
ch == 0x0C or
ch == 0x0D
end
-- 词法分析
-- 会返回一个Token序列
local function Tokenizer(sourceCode)
local tokens = {}
local idx = 1
local nextCh = GenSrcIter(sourceCode)
local ascii, ch = nextCh()
while true do
-- 跳过所有空白符
while ascii and IsWhiteChar(ascii) do
ascii, ch = nextCh()
end
if not ascii then
tokens[idx] = GenToken(TokenType.EOF, "EOF")
break
end
-- 数字
if IsNumber(ascii) then
local str = ""
while ascii and IsNumber(ascii) do
str = str..ch
ascii, ch = nextCh()
end
-- 遇到小数点
if ascii and IsPoint(ascii) then
str = str..ch
ascii, ch = nextCh()
while ascii and IsNumber(ascii) do
str = str..ch
ascii, ch = nextCh()
end
end
tokens[idx] = GenToken(TokenType.Number, str)
idx = idx + 1
-- 遇到运算符
elseif IsOperator(ascii) then
tokens[idx] = GenToken(({[0x2B] = TokenType.OpAdd,
[0x2D] = TokenType.OpSub,
[0x2A] = TokenType.OpMul,
[0x2F] = TokenType.OpDiv})[ascii], ch)
idx = idx + 1
ascii, ch = nextCh()
-- 遇到括号
elseif IsParen(ascii) then
tokens[idx] = GenToken(({[0x28] = TokenType.LeftParen,
[0x29] = TokenType.RightParen})[ascii], ch)
idx = idx + 1
ascii, ch = nextCh()
-- 遇到以小数点开头的小数
elseif IsPoint(ascii) then
local str = ch
ascii, ch = nextCh()
while ascii and IsNumber(ascii) do
str = str..ch
ascii, ch = nextCh()
end
tokens[idx] = GenToken(TokenType.Number, str)
idx = idx + 1
-- 遇到未定义的符号
else
error("遇到未定义符号: "..ch)
end
end
return tokens
end
local exp = "3.1415926\t+ .233*369111 / ( 4.0- 1)"
local tokens = Tokenizer(exp)
print(exp)
for _, token in ipairs(tokens) do
print(tostring(token))
end
未完待续。。。
|