Lecture 12.0 — 2017-02-27

More parsing with Applicative

This lecture is written in literate Haskell; you can download the raw source.

From our basis parser definitions, we worked on defining a left associative grammar for arithmetic expressions.

import Data.Char
import Control.Applicative

newtype Parser a = Parser { parse :: String -> Maybe (a,String) }

instance Functor Parser where
  fmap f p = Parser $ \s ->
    case parse p s of
      Nothing -> Nothing
      Just (v,s') -> Just (f v,s')

instance Applicative Parser where
  pure a = Parser $ \s -> Just (a,s)
  f <*> a = Parser $ \s ->
    case parse f s of
      Nothing -> Nothing
      Just (g,s') -> parse (fmap g a) s'

eof :: Parser ()
eof = Parser $ \s -> if null s then Just ((),"") else Nothing

ensure :: (a -> Bool) -> Parser a -> Parser a
ensure pred p = Parser $ \s ->
  case parse p s of
    Nothing -> Nothing
    Just (a,s') -> if pred a then Just (a,s') else Nothing

satisfy :: (Char -> Bool) -> Parser Char
satisfy p = Parser f
  where f [] = Nothing
        f (x:xs) = if p x then Just (x,xs) else Nothing

lookahead :: Parser (Maybe Char)
lookahead = Parser f
  where f [] = Just (Nothing,[])
        f (c:s) = Just (Just c,c:s)

instance Alternative Parser where
  empty = Parser $ \s -> Nothing
  p1 <|> p2 = Parser $ \s ->
    case parse p1 s of
      Just (a,s') -> Just (a,s')
      Nothing -> parse p2 s

int :: Parser Int
int = read <$> some (satisfy isDigit)

spaces :: Parser ()
spaces = many (satisfy isSpace) *> pure ()

char :: Char -> Parser Char
char c = spaces *> satisfy (==c)

plus, minus, times :: Parser Char
plus = char '+'
minus = char '-'
times = char '*'

num :: Parser Int
num = spaces *> int

Here’s the standard arithmetic parser we started with, extended with negation:

data Arith =
   Num Int
 | Plus Arith Arith
 | Times Arith Arith 
 | Neg Arith
 deriving Show

sub :: Arith -> Arith -> Arith
sub a b = Plus a (Neg b)

term, factor, neg, atom :: Parser Arith
term =       Plus <$> factor <* plus <*> term
         <|> sub  <$> factor <* minus <*> term
         <|> factor
factor =     Times <$> neg <* times <*> factor 
         <|> neg
neg =        Neg <$> (minus *> atom)
         <|> atom
atom = Num <$> num <|> (char '(' *> term <* char ')')

But this parser has a problem: it’s wrong! Consider parse term "5 - 3 - 2". The conventional rules of arithmetic indicate we should have left association, i.e., 5 minus 3 minus 2. But instead we get right associativity: 5 minus (3 minus 2).

How do we fix it? The idea is to understand a “chain” of left associative operations as a series of sub-operations separated by operators.

To warm up, here’s a way to parse one or more ps separated by a sep.

sepBy1 :: Parser a -> Parser sep -> Parser [a]
sepBy1 p sep = (:) <$> p <*> (many (sep *> p))

To define left chaining, we need our separator to not return some arbitrary, thrown away value—we want it to tell us which operator to use! Once we’ve collected all the operators, we can apply them left-associatively by using a fold:

chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
chainl1 p sep = foldl (\acc (op,v) -> op acc v) <$> 
                p <*> many ((\op v -> (op,v)) <$> sep <*> p)

Refactored, we can verify that our grammar still works… try it out!

term', factor', neg', atom' :: Parser Arith
term' = factor' `chainl1` addop
  where addop =     (char '+' *> pure Plus)
                <|> (char '-' *> pure sub)
factor' = neg' `chainl1` mulop
  where mulop =     (char '*' *> pure Times)
neg' = Neg <$> (minus *> atom') <|> atom'
atom' = Num <$> num <|> (char '(' *> term' <* char ')')

Parsing WhileNZ

type VarName = String

keywords :: [String]
keywords = ["WHILE", "DO", "END", "SKIP"]

kw :: String -> Parser ()
kw s = pure () <* spaces <* (sequenceA $ map (satisfy . (==)) s) -- subtly incorrect on, e.g., DOINK, which isn't a kw

var :: Parser VarName
var = ensure (not . (`elem` keywords)) (spaces *> id)
  where id = (:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum)

data WhileNZ =
    Skip
  | Assign VarName Arith
  | Seq WhileNZ WhileNZ
  | WhileNZ Arith WhileNZ
  deriving Show

statement, statements :: Parser WhileNZ
statements = foldl1 Seq <$> (statement `sepBy1` char ';')
statement =     const Skip <$> kw "SKIP"
            <|> Assign <$> (var <* kw ":=") <*> term
            <|> WhileNZ <$>
                  (kw "WHILE" *> (term <* kw "DO")) <*>
                  (statements <* kw "END")
            <|> (\c b -> Seq c (WhileNZ b c)) <$>
                  (kw "DO" *> (statements <* kw "WHILE")) <*>
                  (term <* kw "END")

prog1 = fst <$> parse statements "x := 0"
prog2 = fst <$> parse statements "x := 0;\nWHILE x - 5 DO x := x + 1 END; SKIP"