F#, FParsec, and Updating UserState

2020-04-10 02:08发布

Okay, since my last question elicited no responses, I'm forging ahead in a different direction. Lol!

I can't find any examples beyond the official documentation on managing user state, or accessing the results of a prior parser.

N.b. This code does not compile.

namespace MultipartMIMEParser

open FParsec
open System.IO


type Header = { name  : string
              ; value : string
              ; addl  : (string * string) list option }

type Content = Content of string
             | Post of Post list
and Post = { headers : Header list
           ; content : Content }

type private UserState = { Boundary : string }
  with static member Default = { Boundary="" }


module internal P =
  let ($) f x = f x
  let undefined = failwith "Undefined."
  let ascii = System.Text.Encoding.ASCII
  let str cs = System.String.Concat (cs:char list)

  let makeHeader ((n,v),nvps) = { name=n; value=v; addl=nvps}

  let runP p s = match runParserOnStream p UserState.Default "" s ascii with
                 | Success (r,_,_) -> r
                 | Failure (e,_,_) -> failwith (sprintf "%A" e)

  let blankField = parray 2 newline

  let delimited d e =
      let pEnd = preturn () .>> e
      let part = spaces >>. (manyTill $ noneOf d $ (attempt (preturn () .>> pstring d) <|> pEnd)) |>> str
       in part .>>. part

  let delimited3 firstDelimiter secondDelimiter thirdDelimiter endMarker =
      delimited firstDelimiter endMarker
      .>>. opt (many (delimited secondDelimiter endMarker
                      >>. delimited thirdDelimiter endMarker))

  // TODO: This is the parser I'm asking about.
  let pHeader =
      let includesBoundary s = undefined
      let setBoundary b = { Boundary=b }
       in delimited3 ":" ";" "=" blankField
          |>> makeHeader
          >>. fun stream -> if includesBoundary // How do I access the output from makeHeader here?
                            then stream.UserState <- setBoundary b // I need b to be read from the output of makeHeader.
                                 Reply ()
                            else Reply ()

  let pHeaders = manyTill pHeader $ attempt (preturn () .>> blankField)

  // N.b. This is the mess I'm currently wrestling with. It does not compile, and is
  // not sound yet.
  let rec pContent boundary =
      match boundary with
      | "" -> // Content is text.
              let line = restOfLine false
               in pipe2 pHeaders (manyTill line $ attempt (preturn () .>> blankField))
                  $ fun h c -> { headers=h
                               ; content=Content $ System.String.Join (System.Environment.NewLine,c) }
      | _  -> // Content contains boundaries.
              let b = "--"+boundary
              let p = pipe2 pHeaders (pContent b) $ fun h c -> { headers=h; content=c }
               in skipString b >>. manyTill p (attempt (preturn () .>> blankField))

  let pStream = runP (pipe2 pHeaders pContent $ fun h c -> { headers=h; content=c })


type MParser (s:Stream) =
  let r = P.pStream s

  let findHeader name =
      match r.headers |> List.tryFind (fun h -> h.name.ToLower() = name) with
      | Some h -> h.value
      | None   -> ""

  member p.Boundary =
    let isBoundary ((s:string),_) = s.ToLower() = "boundary"
    let header = r.headers
                 |> List.tryFind (fun h -> if h.addl.IsSome
                                           then h.addl.Value |> List.exists isBoundary
                                           else false)
     in match header with
        | Some h -> h.addl.Value |> List.find isBoundary |> snd
        | None   -> ""
  member p.ContentID = findHeader "content-id"
  member p.ContentLocation = findHeader "content-location"
  member p.ContentSubtype = findHeader "type"
  member p.ContentTransferEncoding = findHeader "content-transfer-encoding"
  member p.ContentType = findHeader "content-type"
  member p.Content = r.content
  member p.Headers = r.headers
  member p.MessageID = findHeader "message-id"
  member p.MimeVersion = findHeader "mime-version"

A truncated example of the POST I am trying to parse follows:

content-type: Multipart/related; boundary="RN-Http-Body-Boundary"; type="multipart/related"

--RN-Http-Body-Boundary
Message-ID: <25845033.1160080657073.JavaMail.webmethods@exshaw>
Mime-Version: 1.0
Content-Type: multipart/related; type="application/xml";
  boundary="----=_Part_235_11184805.1160080657052"

------=_Part_235_11184805.1160080657052
Content-Type: Application/XML
Content-Transfer-Encoding: binary
Content-Location: RN-Preamble
Content-ID: <1430586.1160080657050.JavaMail.webmethods@exshaw>

XML document begins here...

1条回答
2楼-- · 2020-04-10 02:39

So basically, what you want to do in pHeader is to use the parser as a monad, rather than an applicative. Based on your code style you come from Haskell so I'll assume you know these words. Something like this then:

  let pHeader =
      let includesBoundary s = undefined
      let setBoundary b = { Boundary=b }
       in delimited3 ":" ";" "=" blankField
          |>> makeHeader
          >>= fun header stream ->
               if includesBoundary header
               then let b = undefined // some expression including header, if I understood correctly
                    stream.UserState <- setBoundary b
                    Reply ()
               else Reply ()

Or you can write it in a computation expression (which would correspond to do-notation in Haskell):

  let pHeader =
      let includesBoundary s = undefined
      let setBoundary b = { Boundary=b }
      parse {
          let! header =
              delimited3 ":" ";" "=" blankField
              |>> makeHeader
          return! fun stream ->
               if includesBoundary header
               then let b = undefined // some expression including header, if I understood correctly
                    stream.UserState <- setBoundary b
                    Reply ()
               else Reply ()
      }
查看更多
登录 后发表回答