Having trouble spliting the contents of a node in

2019-08-06 01:46发布

问题:

Give the following XML I am trying to split the contents of the "extra" node into four parts by the | delimiter and return the four parts in the response as chdspt(0), chdspt(1), chdspt(2) and chdspt(3). The problem I seem to have is if the XML loaded is missing anything inside the "extra" node then the whole script fails with a 500 error.

<Export>
    <SAVED_EXPORT>
        <id>00-6189</id>
        <title>00-6189 Start Mech Switch</title>
        <price>5.46 USD</price>
        <extra>Male|Adult|Black|medium</extra>
    </SAVED_EXPORT>
    <SAVED_EXPORT>
        <id>00-6190</id>
        <title>00-6190 Start Mech Switch</title>
        <price>5.46 USD</price>
        <extra></extra>
    </SAVED_EXPORT>
</Export>

Note that I removed a lot of the sibling nodes from the XML above as it is too much to post and should be unnecessary for the question at hand.

Dim xData,xNewDoc,xmldataout,xmldataout2,title,description,link,gpc,chd,chdspt

url = "http://thesite.com/v/myxml.xml"
Set xData = Server.CreateObject("Microsoft.XMLHTTP")
xData.Open "get", url, False
xData.Send
Set xNewDoc = xData.responseXML 'ResponseXml returns DOMDocument object

For Each x In xNewDoc.documentElement.selectNodes(".//SAVED_EXPORT")

    Dim productid: Set productid = x.selectSingleNode("id")
    Dim pt: Set pt = x.selectSingleNode("title")
    Dim pds: Set pds = x.selectSingleNode("striphtml-description")
    Dim pl: Set pl = x.selectSingleNode("link")
    Dim pe: Set pe = x.selectSingleNode("product_type")
    Dim pri: Set pri = x.selectSingleNode("price")
    Dim psp: Set psp = x.selectSingleNode("sale_price")
    Dim pbr: Set pbr = x.selectSingleNode("brand")
    Dim pcn: Set pcn = x.selectSingleNode("condition")
    Dim pex: Set pex = x.selectSingleNode("expiration_date")
    Dim pwe: Set pwe = x.selectSingleNode("weight")
    Dim ppn: Set ppn = x.selectSingleNode("id")
    Dim pil: Set pil = x.selectSingleNode("image_link")
    Dim pav: Set pav = x.selectSingleNode("availability")
    Dim ppi: Set ppi = x.selectSingleNode("upc")
    Dim pch: Set pch = x.selectSingleNode("extra")

    title=Replace(pt.text,"&","&amp;")
    title=Replace(title,"<","&lt;")
    title=Replace(title,">","&gt;")

    description=Replace(pds.text,"&","&amp;")
    description=Replace(description,"<","&lt;")
    description=Replace(description,">","&gt;")

    link=Replace(pl.text,"&","&amp;")
    link=Replace(link,"<","&lt;")
    link=Replace(link,">","&gt;")

    gpc=Replace(pri.text,"&","&amp;")
    gpc=Replace(gpc,"<","&lt;")
    gpc=Replace(gpc,">","&gt;")

    chd = pch.text
    chdspt = split(chd, "|")    


xmldataout= "<item><g:id>" & productid.text & "</g:id>" & "<title>" & title & "</title>"
& "<description>" & description & "</description>" & "<link>" & link & "</link>"
& "<g:google_product_category>" & gpc & "</g:google_product_category>" & "<g:price>" & pri.text & "</g:price>" & "<g:sale_price>" & psp.text & "</g:sale_price>"
& "<g:brand>" & pbr.text & "</g:brand>" & "<g:condition>" & pcn.text & "</g:condition>"
& "<g:expiration_date>" & pex.text & "</g:expiration_date>" & "<g:shipping_weight>" & pwe.text & "</g:shipping_weight>" & "<g:mpn>" & ppn.text & "</g:mpn>"
& "<g:image_link>" & pil.text & "</g:image_link>" & "<g:availability>" & pav.text & "</g:availability>" & "<g:gtin>" & ppi.text & "</g:gtin>"
& "<g:gender>" & chdspt(0) &  "</g:gender>" & "<g:age_group>" & chdspt(1) & "</g:age_group>" & "<g:color>" & chdspt(2) & "</g:color>"
& "<g:size>" & chdspt(3) & "</g:size>  </item>" 

xmldataout2=xmldataout2+xmldataout
Next


Response.ContentType = "text/xml"
Response.Write("<?xml version='1.0' encoding='UTF-8'?><rss version='2.0' xmlns:g='http://base.google.com/ns/1.0'><channel><title>store</title><link>http://www.thesite.com</link><description>This is a sample feed</description>" & xmldataout2 & "</channel></rss>")

Set xNewDoc = Nothing
Set xData = Nothing

回答1:

Here is a completely new version of your code, using "best practices" (i.e. no manual character escaping, no building XML from strings but using a DOM instead, using current COM objects (Microsoft.XMLHTTP is way outdated).

Note that NewElem() and GetText() are convenience functions found at the end of the code.

  • NewElem() creates a new Element with or without namespace and adds a text value to it
  • GetText() searches a context (document or node) for an XPath expression and returns the text value of the first element found, or ""(i.e. a smarter .selectSingleNode())

ASP code:

Option Explicit

Const NODE_ELEMENT = 1

Dim Namespaces, XmlHttpReq, InputDoc, OutputDoc, Channel, x, NewItem
Dim url, StubFeed, v, vGender, vAgeGroup, vColor, vSize

url = "http://thesite.com/v/myxml.xml"
StubFeed = "<rss version='2.0' xmlns:g='http://base.google.com/ns/1.0'><channel><title>store</title><link>http://www.thesite.com</link><description>This is a sample feed</description></channel></rss>" 

' prepare a dictitionary of namespace prefixes and respective URIs
Set Namespaces = Server.CreateObject("Scripting.Dictionary")
Namespaces.Add "g", "http://base.google.com/ns/1.0"

Set OutputDoc  = Server.CreateObject("MSXML2.DOMDocument.4.0")
Set XmlHttpReq = Server.CreateObject("MSXML2.XMLHTTP.4.0")

' retrieve the source document
' TODO: Error handling in case the HTTP request fails!
XmlHttpReq.Open "GET", url, False
XmlHttpReq.Send
Set InputDoc = XmlHttpReq.responseXML

' the stub of the output is loaded from string
OutputDoc.loadXML StubFeed

' all new items are appended to this element
Set Channel = OutputDoc.selectSingleNode("/rss/channel")

For Each x In InputDoc.selectNodes("//SAVED_EXPORT")
  Set NewItem = OutputDoc.createElement("item")
  With NewItem
    .appendChild NewElem("g:id",                      GetText(x, "id"))
    .appendChild NewElem("title",                     GetText(x, "title"))
    .appendChild NewElem("description",               GetText(x, "striphtml-description"))
    .appendChild NewElem("link",                      GetText(x, "link"))
    .appendChild NewElem("g:google_product_category", GetText(x, "product_type"))
    .appendChild NewElem("g:price",                   GetText(x, "price"))
    .appendChild NewElem("g:sale_price",              GetText(x, "sale_price"))
    .appendChild NewElem("g:brand",                   GetText(x, "brand"))
    .appendChild NewElem("g:condition",               GetText(x, "condition"))
    .appendChild NewElem("g:expiration_date",         GetText(x, "expiration_date"))
    .appendChild NewElem("g:shipping_weight",         GetText(x, "weight"))
    .appendChild NewElem("g:mpn",                     GetText(x, "id"))
    .appendChild NewElem("g:image_link",              GetText(x, "image_link"))
    .appendChild NewElem("g:availability",            GetText(x, "availability"))
    .appendChild NewElem("g:gtin",                    GetText(x, "upc"))

    v = Split(Trim( GetText(x, "extra[normalize-space() != '']") ), "|")
    If UBound(v) = 3 Then
      vGender = v(0) : vAgeGroup = v(1) : vColor = v(2) : vSize = v(3)
    Else
      vGender = ""   : vAgeGroup = ""   : vColor = ""   : vSize = ""
    End If

    .appendChild NewElem("g:gender",    vGender)
    .appendChild NewElem("g:age_group", vAgeGroup)
    .appendChild NewElem("g:color",     vColor)
    .appendChild NewElem("g:size",      vSize)
  End With

  Channel.appendChild NewItem
Next

' send the feed (saving the document to the Response object does this)
Response.ContentType = "text/xml; charset=UTF-8"
OutputDoc.save Response

' --- HELPER FUNCTIONS ------------------------------------------------------
Function NewElem(name, text)
  Dim namespaceURI
  If InStr(name, ":") Then
    namespaceURI = Namespaces( Split(name, ":")(0) )
    Set NewElem = OutputDoc.createNode(NODE_ELEMENT, name, namespaceURI)
  Else
    Set NewElem = OutputDoc.createElement(name)
  End If

  If text <> "" Then NewElem.text = text
End Function

Function GetText(context, xPath)
  Dim node
  Set node = context.selectSingleNode(xPath)
  If node Is Nothing Then
    GetText = ""
  Else
    GetText = node.text
  End If
End Function

Think about using the more appropriate application/rss+xml content type instead of text/xml



回答2:

Just noticed that @TomaLak left out the <?xml version='1.0' ?> in this line of code above

StubFeed = "<rss version='2.0' xmlns:g='http://base.google.com/ns/1.0'><channel><title>store</title><link>http://www.thesite.com</link><description>This is a sample feed</description></channel></rss>"