Yacc优先级处理
本文基于龙书第4.9.2节示例(图4-59)实现。
创建F# xUnit项目,命名为C4F59,安装以下NuGet包:
Install-Package FSharpCompiler.Yacc
Install-Package FSharpCompiler.Parsing
Install-Package FSharp.xUnit
Install-Package FSharp.Literals
Install-Package FSharp.Idioms
创建语法文件C4F59.yacc:
lines : lines expr "\n"
| lines "\n"
| /* empty */
;
expr : expr "+" expr
| expr "-" expr
| expr "*" expr
| expr "/" expr
| "(" expr ")"
| "-" expr %prec UMINUS
| NUMBER
;
%%
%left "+" "-"
%left "*" "/"
%right UMINUS
语法文件结构遵循以下格式:
规则列表
%%
优先级声明
注释采用C语言风格/* ... */,不可嵌套。其中%prec UMINUS用于为特定规则命名,供优先级声明引用。
完成语法文件后,可通过以下代码解析:
let path = Path.Combine(__SOURCE_DIRECTORY__, @"C4F59.yacc")
let text = File.ReadAllText(path)
let yaccFile = YaccFile.parse text
解析结果存储在yaccFile变量中,其结构如下:
let y = {
mainRules=[
["lines";"lines";"expr";"\n"];
["lines";"lines";"\n"];
["lines"];
["expr";"expr";"+";"expr"];
["expr";"expr";"-";"expr"];
["expr";"expr";"*";"expr"];
["expr";"expr";"/";"expr"];
["expr";"(";"expr";")"];
["expr";"-";"expr"];
["expr";"NUMBER"]
];
precedences=[
LeftAssoc,[TerminalKey "+";TerminalKey "-"];
LeftAssoc,[TerminalKey "*";TerminalKey "/"];
RightAssoc,[ProductionKey ["expr";"-";"expr"]]
]
}
生成解析表数据:
let yacc = ParseTable.create(yaccFile.mainRules, yaccFile.precedences)
[<Fact>]
member this.``generate parse table``() =
let result =
[
"let rules = " + Render.stringify yacc.rules
"let kernelSymbols = " + Render.stringify yacc.kernelSymbols
"let parsingTable = " + Render.stringify yacc.parsingTable
] |> String.concat System.Environment.NewLine
output.WriteLine(result)
创建新模块保存解析表数据:
module C4F59ParseTable
let rules = set [["";"lines"];["expr";"(";"expr";")"];....]
let kernelSymbols = Map.ofList [1,"lines";2,"(";3,"expr";....]
let parsingTable = set [0,"",-8;0,"\n",-8;0,"(",-8;....]
验证解析表:
[<Fact>]
member this.``validate parse table``() =
Should.equal yacc.rules C4F59ParseTable.rules
Should.equal yacc.kernelSymbols C4F59ParseTable.kernelSymbols
Should.equal yacc.parsingTable C4F59ParseTable.parsingTable
定义词法标记类型:
type C4F59Token =
| EOL
| LPAREN
| RPAREN
| STAR
| DIV
| PLUS
| MINUS
| NUMBER of int
member this.getTag() =
match this with
| EOL -> "\n"
| LPAREN -> "("
| RPAREN -> ")"
| STAR -> "*"
| DIV -> "/"
| PLUS -> "+"
| MINUS -> "-"
| NUMBER _ -> "NUMBER"
词法分析器实现:
open FSharp.Literals.StringUtils
open System
type ....
static member from (text:string) =
let rec loop (inp:string) =
seq {
match inp with
| "" -> ()
| Prefix @"[\s-[\n]]+" (_,rest) -> yield! loop rest
| Prefix @"\n" (_,rest) ->
yield EOL
yield! loop rest
| PrefixChar '(' rest ->
yield LPAREN
yield! loop rest
| PrefixChar ')' rest ->
yield RPAREN
yield! loop rest
| PrefixChar '*' rest ->
yield STAR
yield! loop rest
| PrefixChar '/' rest ->
yield DIV
yield! loop rest
| PrefixChar '+' rest ->
yield PLUS
yield! loop rest
| PrefixChar '-' rest ->
yield MINUS
yield! loop rest
| Prefix @"\d+" (n,rest) ->
yield NUMBER(Int32.Parse n)
yield! loop rest
| never -> failwith never
}
loop text
测试词法分析器:
[<Fact>]
member this.``tokenize``() =
let inp = "-1/2+3*(4-5)" + System.Environment.NewLine
let tokens = C4F59Token.from inp
let result = Render.stringify (List.ofSeq tokens)
output.WriteLine(result)
输出结果:
[MINUS;NUMBER 1;DIV;NUMBER 2;PLUS;NUMBER 3;STAR;LPAREN;NUMBER 4;MINUS;NUMBER 5;RPAREN;EOL]
构建语法解析器:
module C4F59.C4F59Parser
open FSharpCompiler.Parsing
let parser =
SyntacticParser(
C4F59ParseTable.rules,
C4F59ParseTable.kernelSymbols,
C4F59ParseTable.parsingTable
)
let parseTokens tokens =
parser.parse(tokens,fun (tok:C4F59Token) -> tok.getTag())
测试语法解析:
[<Fact>]
member this.``parse tokens``() =
let tokens = [MINUS;NUMBER 1;DIV;NUMBER 2;PLUS;NUMBER 3;STAR;LPAREN;NUMBER 4;MINUS;NUMBER 5;RPAREN;EOL]
let tree = C4F59Parser.parseTokens tokens
let result = Render.stringify tree
output.WriteLine(result)
定义表达式数据类型:
type C4F59Expr =
| Add of C4F59Expr * C4F59Expr
| Sub of C4F59Expr * C4F59Expr
| Mul of C4F59Expr * C4F59Expr
| Div of C4F59Expr * C4F59Expr
| Negative of C4F59Expr
| Number of int
转换模块实现:
module C4F59.C4F59Translation
open FSharpCompiler.Parsing
let rec translateExpr = function
| Interior("expr",[e1;Terminal PLUS;e2;]) ->
C4F59Expr.Add(translateExpr e1, translateExpr e2)
| Interior("expr",[e1;Terminal MINUS;e2;]) ->
C4F59Expr.Sub(translateExpr e1, translateExpr e2)
| Interior("expr",[e1;Terminal STAR;e2;]) ->
C4F59Expr.Mul(translateExpr e1, translateExpr e2)
| Interior("expr",[e1;Terminal DIV;e2;]) ->
C4F59Expr.Div(translateExpr e1, translateExpr e2)
| Interior("expr",[Terminal LPAREN;e;Terminal RPAREN;]) ->
translateExpr e
| Interior("expr",[Terminal MINUS;e;]) ->
C4F59Expr.Negative(translateExpr e)
| Interior("expr",[Terminal (NUMBER n);]) ->
C4F59Expr.Number n
| never -> failwithf "%A" never.firstLevel
let rec translateLines tree =
[
match tree with
| Interior("lines",[lines;expr;Terminal EOL]) ->
yield! translateLines lines
yield translateExpr expr
| Interior("lines",[lines;Terminal EOL]) ->
yield! (translateLines lines)
| Interior("lines",[]) ->
()
| _ -> failwithf "%A" tree.firstLevel
]
最终测试:
[<Fact>]
member this.``translate to expr``() =
let tokens = [MINUS;NUMBER 1;DIV;NUMBER 2;PLUS;NUMBER 3;STAR;LPAREN;NUMBER 4;MINUS;NUMBER 5;RPAREN;EOL]
let tree = C4F59Parser.parseTokens tokens
let exprs = C4F59Translation.translateLines tree
let result = Render.stringify exprs
output.WriteLine(result)
输出结果:
[Add(Div(Negative(Number 1),Number 2),Mul(Number 3,Sub(Number 4,Number 5)))]