Parse expression right-to-left

2020-03-02 05:04发布

问题:

I'm building a parser for expression.

Here's my grammar rule:

expr   ::= term   (+ expr | - expr | null)
term   ::= expo   (* term | / term | null)
expo   ::= factor (^ expo | null)
factor ::= (expr) | int

and corresponding code:

expr :: Parser Int
expr = do t <- term
          do _ <- symbol "+"
             e <- expr
             return (t + e)
             <|> do _ <- symbol "-"
                    e <- expr
                    return (t - e)
                    <|> return t

term :: Parser Int
term = do ep <- expo
          do _ <- symbol "*"
             t <- term
             return (ep * t)
             <|> do _ <- symbol "/"
                    t <- term
                    return (ep `div` t)
                    <|> return ep

expo :: Parser Int
expo = do f <- factor
          do _ <- symbol "^"
             e <- expo
             return (f ^ e)
             <|> return f

factor :: Parser Int
factor = do _ <- symbol "("
            e <- expr
            _ <- symbol ")"
            return e
            <|> integer

It works well for most case but fail for certain:

$ eval "3*1/3"

0

since 3 * 1 / 3 is parsed to 3 * (1 / 3)

 (*)
 / \
3  (/)
   / \
  1   3

and

$ eval "4-3-2"

3

since 4 - 3 - 2 is parsed to 4 - (3 - 2)

 (-)
 / \
4  (-)
   / \
  3   2

I realize it's all about parsing direction (right associativity).

What I expect is (4 - 3) - 2

   (-)
   / \
 (-)  2
 / \
4   3

which means I would parse right-left and interpret it left-right (or parse it recursively).

I have no idea how to achieve so. Nothing but foldl comes to my mind so far.

Could someone suggest what should I do to fix it?

total program:

{-# OPTIONS_GHC -Wall #-}

import Control.Applicative
import Data.Char

newtype Parser a = P (String -> [(a, String)])

parse :: Parser a -> String -> [(a, String)]
parse (P p) inp = p inp

instance Functor Parser where
    fmap g p = P (\inp -> case parse p inp of
                               []         -> []
                               [(v, out)] -> [(g v, out)]
                               _          -> undefined)

instance Applicative Parser where
    pure v = P (\inp -> [(v, inp)])
    pg <*> px = P (\inp -> case parse pg inp of
                                []         -> []
                                [(g, out)] -> parse (fmap g px) out
                                _          -> undefined)

instance Monad Parser where
    p >>= f = P (\inp -> case parse p inp of
                              []         -> []
                              [(v, out)] -> parse (f v) out
                              _          -> undefined)

instance Alternative Parser where
    empty   = P (\_ -> [])
    p <|> q = P (\inp -> case parse p inp of
                              []         -> parse q inp
                              [(v, out)] -> [(v, out)]
                              _          -> undefined)
    many x = some x <|> pure []
    some x = pure (:) <*> x <*> many x

item :: Parser Char
item = P (\inp -> case inp of
                       []        -> []
                       (x : xs)  -> [(x, xs)])

sat :: (Char -> Bool) -> Parser Char
sat p = do x <- item
           if p x
               then return x
               else empty

digit :: Parser Char
digit = sat isDigit

char :: Char -> Parser Char
char x = sat (== x)

string :: String -> Parser String
string []       = return []
string (x : xs) = do _ <- char x
                     _ <- string xs
                     return (x : xs)

space :: Parser ()
space = do _ <- many (sat isSpace)
           return ()

nat :: Parser Int
nat = do xs <- some digit
         return (read xs)

int :: Parser Int
int = do _ <- char '-'
         n <- nat
         return (-n)
         <|> nat

token :: Parser a -> Parser a
token p = do _ <- space
             v <- p
             _ <- space
             return v

integer :: Parser Int
integer = token int

symbol :: String -> Parser String
symbol = token . string

expr :: Parser Int
expr = do t <- term
          do _ <- symbol "+"
             e <- expr
             return (t + e)
             <|> do _ <- symbol "-"
                    e <- expr
                    return (t - e)
                    <|> return t

term :: Parser Int
term = do ep <- expo
          do _ <- symbol "*"
             t <- term
             return (ep * t)
             <|> do _ <- symbol "/"
                    t <- term
                    return (ep `div` t)
                    <|> return ep

expo :: Parser Int
expo = do f <- factor
          do _ <- symbol "^"
             e <- expo
             return (f ^ e)
             <|> return f

factor :: Parser Int
factor = do _ <- symbol "("
            e <- expr
            _ <- symbol ")"
            return e
            <|> integer

eval :: String -> Int
eval xs = case (parse expr xs) of
               [(n, [])]  -> n
               [(_, out)] -> error ("Unused input " ++ out)
               []         -> error "Invalid input"
               _          -> undefined

回答1:

You can implement parser combinators like these:

chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
chainl1 p op = p >>= rest
  where
    rest x = do{ f <- op
               ; y <- p
               ; rest (f x y)
               }
         <|> pure x

chainr1 :: Parsec a -> Parsec (a -> a -> a) -> Parsec a
chainr1 p op = scan
  where
    scan = p >>= rest
    rest x = (\f y -> f x y) <$> op <*> scan <|> pure x

Then you can implement your grammar rules like these:

expr   = term `chainl1` addop
term   = expo `chainl1` mulop
expo   = factor `chainr1` expop
factor = parens expr <|> integer

addop = (+) <$ symbol "+" <|> (-) <$ symbol "-"
mulop = (*) <$ symbol "*" <|> (div) <$ symbol "/"
expop = (^) <$ symbol "^"

parens p = symbol "(" *> p <* symbol ")"

But I recommend you to use parser library like package parsec.