Indentation, expressions, statements and StackOver

2019-07-30 21:00发布

I test indentation with FParsec, according to this implementation, but when I make it a little more complex by adding expressions (literals, lists, tuples and arithmetic operations), allowing expressions to top-level, and adding a variable creation statement; I first get a StackOverflowException error . In my opinion, this is because the expression parser is solicited in such a way as to make an infinite loop in the program. I see no other reason, however, I don't know how to fix this problem.

If I remove the attempt pexpression from my parser data statement, there is no more StackOverflowException, nevertheless the module IndentationParserWithoutBacktracking (therefore managing indentation) tells me that the code to be parsed is missing a "newline":

Failure: Error in Ln: 2 Col: 1
loop i 0 10
^
Expecting: let or print

The parser backtracked after:
  Error in Ln: 3 Col: 5
      let myVar = 2 + 1
      ^
  Expecting: loop or print

  The parser backtracked after:
    Error in Ln: 3 Col: 17
        let myVar = 2 + 1
                    ^
    Expecting: newline

All this according to the following text to be parsed:

loop i 0 10
    let myVar = 2 + 1
    print myVar

Here is my code:

open FParsec

// module IndentationParserWithoutBacktracking // see the link

// Utils

open IndentationParserWithoutBacktracking

let isBlank = fun c -> c = ' ' || c = '\t'
let ws  = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str

// AST

type Identifier = Identifier of string

type InfixOp = 
    | Sum | Sub | Mul | Div | Pow | Mod
    | And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual

type Value =
    | Int of int
    | Float of float
    | Bool of bool
    | String of string
    | Char of char
    | Variable of Identifier

type Expr =
    | Literal of Value
    | Infix of Expr * InfixOp * Expr
    | List of Expr list
    | Tuple of Expr list

type Statement =
    | Expression of Expr
    | Let of Identifier * Statement list
    | Loop of Identifier * Expr * Expr * Statement list
    | Print of Identifier

// Literals

let numberFormat = NumberLiteralOptions.AllowMinusSign   ||| NumberLiteralOptions.AllowFraction |||
                   NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal    |||
                   NumberLiteralOptions.AllowBinary      ||| NumberLiteralOptions.AllowPlusSign

let literal_numeric =
    numberLiteral numberFormat "number" |>> fun nl ->
        if nl.IsInteger then Literal (Int(int nl.String))
        else Literal (Float(float nl.String))

let literal_bool = 
    (choice [
        (stringReturn "true" (Literal (Bool true)))
        (stringReturn "false" (Literal (Bool false)))
    ]
    .>> ws) <?> "boolean"

let literal_string = 
    (between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
    |>> fun s -> Literal (String s)) <?> "string"

let literal_char = 
    (between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
    |>> fun c -> Literal (Char c)) <?> "character"

let identifier =
    (many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
    |>> fun i -> Identifier i) <?> "valid identifier"

let betweenParentheses p =
    (between (str "(") (str ")") p)

let variable = identifier |>> fun id -> Literal (Variable id)

let literal = (attempt literal_numeric  <|>
               attempt literal_bool     <|>
               attempt literal_char     <|>
               attempt literal_string   <|>
               attempt variable)        <?> "literal"

// Expressions

let pexpr, pexprimpl = createParserForwardedToRef()

let term =
    (ws >>. literal .>> ws) <|>
    (betweenParentheses (ws >>. pexpr)) <|>
    (ws >>. pexpr .>> ws)

let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
    p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))

let ops =
    // Arithmetic
    [ "+"; "-"; "*"; "/"; "%" ] @
    // Logical
    [ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]

let opCorrespondance op =
    match op with
    // Arithmetic operators
    | "+"  -> Sum
    | "-"  -> Sub
    | "*"  -> Mul
    | "/"  -> Div
    | "%"  -> Mod
    // Logical operators
    | "&&" -> And
    | "||" -> Or
    | "==" -> Equal
    | "!=" -> NotEqual
    | ">"  -> Greater
    | "<"  -> Smaller
    | ">=" -> GreaterEqual
    | "<=" -> SmallerEqual

let opParser = new OperatorPrecedenceParser<_, _, _>()

for op in ops do
    infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))

opParser.TermParser <- term

let list = between (str "[") (str "]") (sepBy pexpr (str ",")) |>> List

let tuple = between (str "(") (str ")") (sepBy pexpr (str ",")) |>> Tuple

let expression =
    opParser.ExpressionParser   <|> // I removed this line to don't have the mistake again.
    list                        <|>
    tuple                       <|>
    literal

pexprimpl := attempt expression

// Statements

let statements, statementsRef = createParserForwardedToRef()

let pexpression = expression |>> Expression

let plet =
    pipe2
        (keyword "let" >>. ws1 >>. identifier)
        (ws >>. str "=" >>. ws >>. statements)
        (fun id gtt exp -> Let(id, gtt, exp))

// From the link, but "revisited"
let ploop =
    pipe4
        (keyword "loop" >>. ws1 >>. identifier)
        (ws1 >>. literal) // If I put 'pexpr', it doesn't work too...
        (ws1 >>. literal)
        (statements)
        (fun id min max stmts -> Loop(id, min, max, stmts))

let print = keyword "print" >>. (ws1 >>. identifier |>> Print)

let statement =
    attempt plet        <|>
    attempt print       <|>
    attempt ploop       <|>
    attempt pexpression

statementsRef := indentedMany1 statement "statement"

let document = statements .>> spaces .>> eof

let test str =
    match runParserOnString document (UserState.Create()) "" str with
        | Success(result, _, _)   -> printfn "Success: %A" result
        | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg

System.Console.Clear()

test @"
loop i 0 10
    let myVar = 2 + 1
    print myVar
"

I know I ask several questions at the same time, and the site doesn't really allow it, but they're all a little linked together, so I might as well kill two birds with one stone...

I would really like to understand my mistakes, in order to design a parser for a very small ML-like language.

Thank you.

Edit

Here is my current code, which has been modified to respond to the first problems encountered with indentation:

open IndentationParserWithoutBacktracking // So from the link

let isBlank = fun c -> c = ' ' || c = '\t'
let ws  = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str

// AST

type Identifier = Identifier of string

type Value =
    | Int of int
    | Float of float
    | Bool of bool
    | String of string
    | Char of char
    | Variable of Identifier

// In FP, "all" is an expression, so:

type Expr =
    // Arithmetic + lists and tuple
    | Literal of Value
    | Infix of Expr * InfixOp * Expr
    | List of Expr list
    | Tuple of Expr list
    // Statements
    | Return of Expr
    | Loop of Identifier * Expr * Expr * Expr list
    | Print of Identifier
and InfixOp = 
    | Sum | Sub | Mul | Div | Pow | Mod
    | And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual

// Literals

let numberFormat = NumberLiteralOptions.AllowMinusSign   ||| NumberLiteralOptions.AllowFraction |||
                   NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal    |||
                   NumberLiteralOptions.AllowBinary

let literal_numeric =
    numberLiteral numberFormat "number" |>> fun nl ->
        if nl.IsInteger then Literal (Int(int nl.String))
        else Literal (Float(float nl.String))

let literal_bool = 
    (choice [
        (stringReturn "true" (Literal (Bool true)))
        (stringReturn "false" (Literal (Bool false)))
    ]
    .>> ws) <?> "boolean"

let literal_string = 
    (between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
    |>> fun s -> Literal (String s)) <?> "string"

let literal_char = 
    (between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
    |>> fun c -> Literal (Char c)) <?> "character"

let identifier =
    (many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
    |>> fun i -> Identifier i) <?> "identifier"

let betweenParentheses p =
    (between (str "(") (str ")") p) <?> ""

let variable = identifier |>> fun id -> Literal (Variable id)

let literal = (attempt literal_numeric  <|>
               attempt literal_bool     <|>
               attempt literal_char     <|>
               attempt literal_string   <|>
               attempt variable)        <?> "literal"

// Expressions and statements

let pexprs, pexprimpl = createParserForwardedToRef()

// `ploop` is located here to force `pexprs` to be of the type `Expr list`, `ploop` requesting an expression list.
let ploop =
    pipe4
        (keyword "loop" >>. ws1 >>. identifier)
        (ws1 >>. literal)
        (ws1 >>. literal)
        (pexprs)
        (fun id min max stmts -> Loop(id, min, max, stmts))

// `singlepexpr` allows to use only one expression.
let singlepexpr =
    pexprs |>> fun ex -> ex.Head

let term =
    (ws >>. singlepexpr .>> ws) <|>
    (betweenParentheses (ws >>. singlepexpr)) <|>
    (ws >>. literal .>> ws) <|>
    (betweenParentheses (ws >>. literal))

let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
    p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))

let ops =
    // Arithmetic
    [ "+"; "-"; "*"; "/"; "%" ] @
    // Logical
    [ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]

let opCorrespondance op =
    match op with
    // Arithmetic operators
    | "+"  -> Sum
    | "-"  -> Sub
    | "*"  -> Mul
    | "/"  -> Div
    | "%"  -> Mod
    // Logical operators
    | "&&" -> And
    | "||" -> Or
    | "==" -> Equal
    | "!=" -> NotEqual
    | ">"  -> Greater
    | "<"  -> Smaller
    | ">=" -> GreaterEqual
    | "<=" -> SmallerEqual

let opParser = new OperatorPrecedenceParser<Expr, unit, UserState>()

for op in ops do
    infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))

opParser.TermParser <- term

let list = (between (str "[") (str "]") (sepBy singlepexpr (str ",")) |>> List) <?> "list"

let tuple = (between (str "(") (str ")") (sepBy singlepexpr (str ",")) |>> Tuple) <?> "tuple"

// Statements

// A commented `let` expression, commented for tests with the `return` instruction.

//let plet =
//    pipe3
//        (keyword "let" >>. ws1 >>. identifier)
//        (ws >>. gtt ":")
//        (ws >>. str "=" >>. ws >>. pexprs)
//        (fun id gtt exp -> Let(id, gtt, exp))

let preturn = 
    keyword "return" >>. ws >>. singlepexpr
    |>> fun ex -> Return ex

let print = keyword "print" >>. (ws1 >>. identifier |>> Print)

let instruction =
    print <|>
    ploop <|>
    preturn <|>

    opParser.ExpressionParser <|> // So we add the arithmetic, like x + y or 21 * 32 - 12 for example
    list <|>
    tuple <|>
    literal

pexprimpl := indentedMany1 instruction "instruction"

let document = pexprs .>> spaces .>> eof

let test str =
    match runParserOnString document (UserState.Create()) "" str with
        | Success(result, _, _)   -> printfn "%A" result
        | Failure(errorMsg, _, _) -> printfn "%s" errorMsg

System.Console.Clear()

// The test code that give an error of "newline" expecting
let code = test @"
return 2 + 1
"

And here some screenshots about error:

enter image description here enter image description here enter image description here

标签: f# fparsec
1条回答
看我几分像从前
2楼-- · 2019-07-30 21:35

The reason why indentedMany1 tells you it's expecting a newline in your example code is because that's what it's looking for: an indented block. Not an expression on one line. So your let myVar = 2 + 1 line is confusing it. If you wrote it as:

let myVar =
    2 + 1

then I bet it would work.

What you need, I believe, is to change your let parser to allow one of two things: either an expression on a single line, or a block of statements (your statements parser). I.e., something like:

let pLetValue = expression <|> statements
let plet =
    pipe2
        (keyword "let" >>. ws1 >>. identifier)
        (ws >>. str "=" >>. ws >>. pLetValue)
        (fun id gtt exp -> Let(id, gtt, exp))

Note that I haven't tested this, as I don't have much time today. It's possible that instead of expression above, you'd want attempt expression (or pexpr, which is the same thing). Experiment a little and see what happens; and if you're completely lost as you try to figure out how FParsec is handling a given expression, remember the advice given in http://www.quanttec.com/fparsec/users-guide/debugging-a-parser.html.

查看更多
登录 后发表回答