Swift parse html table

2019-07-20 15:37发布

问题:

I am trying to bring this info from a web page into an iOS app.

HTML Code:

<table border="0" cellpadding="3" cellspacing="0" width="85%"><tr><td width="100%" colspan="3" bgcolor="#C9C9E7"><b>Update as of 3:57:00 PM (CDT) Thu., Apr. 16, 2015</b><br></td></tr><tr>
<td width="50%" bgcolor="#FFFFFF">Production Line 1</td>
<td width="35%" bgcolor="#FFFFFF">9:00 minutes  (10 min)&nbsp;</td>
<td width="15%" bgcolor="#FFFFFF">No delay</td>
</tr><tr>
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 2</b></td>
<td width="35%" bgcolor="#FFFFFF"><b>7:57 minutes  </b><b>(4 min)&nbsp;</b></td>
<td width="15%" bgcolor="#FFFFFF"><b>+3:57</b></td>
</tr><tr>
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 3</b></td>
<td width="35%" bgcolor="#FFFFFF"><b>10:35 minutes  </b><b>(8 min)&nbsp;</b></td>
<td width="15%" bgcolor="#FFFFFF"><b>+2:35</b></td>
</tr></table>

This is my current attempt at the code. I have marked the line that I think is the issue with // issue. I get just optional nil values. I am using Swift-HTML-Parser to help make this happen. I made only one change and that was to add a section for table.

import UIKit
extension String {
var html2String:String {
    return NSAttributedString(data: dataUsingEncoding(NSUTF8StringEncoding)!, options: [NSDocumentTypeDocumentAttribute:NSHTMLTextDocumentType,NSCharacterEncodingDocumentAttribute:NSUTF8StringEncoding], documentAttributes: nil, error: nil)!.string
   }
}

class mainViewController: UIViewController {

@IBOutlet var textView: UITextView!


override func viewDidLoad() {
    super.viewDidLoad()
    updateTrafficInfo()

    let singleFingerTap = UITapGestureRecognizer(target: self, action: "handleSingleTap:")
    self.textView.addGestureRecognizer(singleFingerTap)
}

// MARK: gestutre recognizer
func handleSingleTap(recognizer: UITapGestureRecognizer) {
    updateTrafficInfo()
}

func updateTrafficInfo(){
    var request = HTTPTask()
    var err: NSError?
    request.GET("http://localhost/productiontimes.html", parameters: nil, success: {(response: HTTPResponse) in
        if let data = response.responseObject as? NSData {
            let rawHTML = NSString(data: data, encoding: NSUTF8StringEncoding) as! String
            let option = CInt(HTML_PARSE_NOERROR.value | HTML_PARSE_RECOVER.value)

            var parser = HTMLParser(html: rawHTML, encoding: NSUTF8StringEncoding, option: option, error: &err)
            if err != nil {
                println(err)
            }
            var bodyNode = parser.table
            if let inputNodes = bodyNode?.xpath("//tr") {
                for node in inputNodes {
                    dispatch_sync(dispatch_get_main_queue(), {
                    self.textView.text = node.xpath("//td")?[0].contents  //issue
                    println(node.xpath("//td")?[0].contents)
                    });
                }
            }


            dispatch_sync(dispatch_get_main_queue(), {
                self.textView.text = rawHTML.html2String


            });

        }
        },failure: {(error: NSError, response: HTTPResponse?) in
            println("error: \(error)")
    })
}



override func didReceiveMemoryWarning() {
    super.didReceiveMemoryWarning()
    // Dispose of any resources that can be recreated.
}


}

Goal would be to have the output in a format of something like this.

Production Line  -      Time  -                     Delay
Production Line 1 -     9:00 minutes  (10 min) -    No delay
Production Line 2 -     7:57 minutes  (4 min) -     +3:57

回答1:

edit/update: Swift 4.x

extension Data {
    var html2AttributedString: NSAttributedString? {
        return try? NSAttributedString(data: self, options: [.documentType: NSAttributedString.DocumentType.html, .characterEncoding: String.Encoding.utf8.rawValue], documentAttributes: nil)

    }
    var html2String: String {
        return html2AttributedString?.string ?? ""
    }
}

let data = Data("""
<table border="0" cellpadding="3" cellspacing="0" width="85%"><tr><td width="100%" colspan="3" bgcolor="#C9C9E7"><b>Update as of 3:57:00 PM (CDT) Thu., Apr. 16, 2015</b><br></td></tr><tr>
<td width="50%" bgcolor="#FFFFFF">Production Line 1</td>
<td width="35%" bgcolor="#FFFFFF">9:00 minutes  (10 min)&nbsp;</td>
<td width="15%" bgcolor="#FFFFFF">No delay</td>
</tr><tr>
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 2</b></td>
<td width="35%" bgcolor="#FFFFFF"><b>7:57 minutes  </b><b>(4 min)&nbsp;</b></td>
<td width="15%" bgcolor="#FFFFFF"><b>+3:57</b></td>
</tr><tr>
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 3</b></td>
<td width="35%" bgcolor="#FFFFFF"><b>10:35 minutes  </b><b>(8 min)&nbsp;</b></td>
<td width="15%" bgcolor="#FFFFFF"><b>+2:35</b></td>
</tr></table><table border="0" cellpadding="3" cellspacing="0" width="85%"><tr><td width="100%" colspan="3" bgcolor="#C9C9E7"><b>Update as of 3:57:00 PM (CDT) Thu., Apr. 16, 2015</b><br></td></tr><tr>
<td width="50%" bgcolor="#FFFFFF">Production Line 1</td>
<td width="35%" bgcolor="#FFFFFF">9:00 minutes  (10 min)&nbsp;</td>
<td width="15%" bgcolor="#FFFFFF">No delay</td>
</tr><tr>
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 2</b></td>
<td width="35%" bgcolor="#FFFFFF"><b>7:57 minutes  </b><b>(4 min)&nbsp;</b></td>
<td width="15%" bgcolor="#FFFFFF"><b>+3:57</b></td>
</tr><tr>
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 3</b></td>
<td width="35%" bgcolor="#FFFFFF"><b>10:35 minutes  </b><b>(8 min)&nbsp;</b></td>
<td width="15%" bgcolor="#FFFFFF"><b>+2:35</b></td>
</tr></table>
""".utf8)

let output = data.html2String
let components = output.components(separatedBy: .newlines)
for index in stride(from: 1, to: 9, by: 3) {
    let line = components[index]
    let time = components[index+1]
    let delay = components[index+2]
    print( line + " - " + time + " - " + delay )
}


回答2:

You have some trouble in your XPath, see the following code :

let html = "<table border='0' cellpadding='3' cellspacing='0' width='85%'><tr><td width='100%' colspan='3' bgcolor='#C9C9E7'><b>Update as of 3:57:00 PM (CDT) Thu., Apr. 16, 2015</b><br></td></tr><tr>" +
        "<td width='50%' bgcolor='#FFFFFF'><b>Production Line 1</b></td>" +
        "<td width='35%' bgcolor='#FFFFFF'><b>9:00 minutes</b><b>(10 min)&nbsp;</b></td>" +
        "<td width='15%' bgcolor='#FFFFFF'><b>No delay</b></td>" +
        "</tr><tr>" +
        "<td width='50%' bgcolor='#FFFFFF'><b>Production Line 2</b></td>" +
        "<td width='35%' bgcolor='#FFFFFF'><b>7:57 minutes  </b><b>(4 min)&nbsp;</b></td>" +
        "<td width='15%' bgcolor='#FFFFFF'><b>+3:57</b></td>" +
        "</tr><tr>" +
        "<td width='50%' bgcolor='#FFFFFF'><b>Production Line 3</b></td>" +
        "<td width='35%' bgcolor='#FFFFFF'><b>10:35 minutes  </b><b>(8 min)&nbsp;</b></td>" +
        "<td width='15%' bgcolor='#FFFFFF'><b>+2:35</b></td>" +
    "</tr></table>"



    var err : NSError?
    var parser  = HTMLParser(html: html, error: &err)
    if err != nil {
        println(err)
        exit(1)
    }        

    var table = parser.html

    // avoid the first <td> tag 
    if let inputNodes = table?.xpath("//td[position() > 1]/b") {

        println("Production Line  -      Time  -                     Delay")            
        for (index, node) in enumerate(inputNodes) {
            if index % 4 == 0 {
                println("\n")
            }
            print(node.contents + "-    ")
        }
    }

The output is the following :

Production Line  -      Time  -                     Delay

Production Line 1-    9:00 minutes-    (10 min) -    No delay-    

Production Line 2-    7:57 minutes  -    (4 min) -    +3:57-    

Production Line 3-    10:35 minutes  -    (8 min) -    +2:35- 

You can personalize your output as you want. I hope this help you.



标签: html swift ios8