我想读在给定文件NSURL
并将其加载到一个数组,由一个换行符分隔项目\n
。
这是我迄今所做的方式:
var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString
if var list = possList {
list = list.componentsSeparatedByString("\n") as NSString[]
return list
}
else {
//return empty list
}
我不是很满意这个一对夫妇的原因。 一,我与范围从几KB到几百MB的大小文件的工作。 你可以想像,处理字符串这个大是缓慢和笨拙。 其次,死机时,它的执行UI - 同样,也不好。
我看着运行在一个单独的线程的代码,但我一直有麻烦,而且,它仍然没有解决处理巨大的字符串的问题。
我想要做的是沿着以下伪线的东西:
var aStreamReader = new StreamReader(from_file_or_url)
while aStreamReader.hasNextLine == true {
currentline = aStreamReader.nextLine()
list.addItem(currentline)
}
我怎么会在斯威夫特做到这一点?
关于我从阅读这些文件的几个注意事项:所有的文件包括由两种分离短(<255个字符)的字符串\n
或\r\n
。 文件的长度的范围从〜100行到50万线。 它们可能包含欧洲字符,和/或重音符号的字符。
Answer 1:
(该代码是雨燕2.2 / 7.3的Xcode现在,旧版本可以在如果有人需要它。对于斯威夫特3的更新版本,结束时所提供的编辑历史中找到。)
以下SWIFT代码在很大程度上受到各种答案的启发如何通过线来读取NSFileHandle线数据? 。 它从块读取文件,并转换完整的生产线为字符串。
默认线定界符( \n
),字符串编码(UTF-8)和块大小(4096)可与可选参数进行设置。
class StreamReader {
let encoding : UInt
let chunkSize : Int
var fileHandle : NSFileHandle!
let buffer : NSMutableData!
let delimData : NSData!
var atEof : Bool = false
init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) {
self.chunkSize = chunkSize
self.encoding = encoding
if let fileHandle = NSFileHandle(forReadingAtPath: path),
delimData = delimiter.dataUsingEncoding(encoding),
buffer = NSMutableData(capacity: chunkSize)
{
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = buffer
} else {
self.fileHandle = nil
self.delimData = nil
self.buffer = nil
return nil
}
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
if atEof {
return nil
}
// Read data chunks from file until a line delimiter is found:
var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
while range.location == NSNotFound {
let tmpData = fileHandle.readDataOfLength(chunkSize)
if tmpData.length == 0 {
// EOF or read error.
atEof = true
if buffer.length > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = NSString(data: buffer, encoding: encoding)
buffer.length = 0
return line as String?
}
// No more lines.
return nil
}
buffer.appendData(tmpData)
range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
}
// Convert complete line (excluding the delimiter) to a string:
let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)),
encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0)
return line as String?
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seekToFileOffset(0)
buffer.length = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
用法:
if let aStreamReader = StreamReader(path: "/path/to/file") {
defer {
aStreamReader.close()
}
while let line = aStreamReader.nextLine() {
print(line)
}
}
你甚至可以使用阅读器使用for-in循环
for line in aStreamReader {
print(line)
}
通过实现SequenceType
协议(比较http://robots.thoughtbot.com/swift-sequences ):
extension StreamReader : SequenceType {
func generate() -> AnyGenerator<String> {
return AnyGenerator {
return self.nextLine()
}
}
}
更新夫特3 / Xcode的8个β6:也“现代化”使用guard
和新的Data
值类型:
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}
Answer 2:
我从包裹藻类的答案代码为方便类(雨燕4.0)
UPD:此代码是独立于平台(Mac系统,iOS版,Ubuntu的)
import Foundation
/// Read text file line by line
public class LineReader {
public let path: String
fileprivate let file: UnsafeMutablePointer<FILE>!
init?(path: String) {
self.path = path
file = fopen(path, "r")
guard file != nil else { return nil }
}
public var nextLine: String? {
var line:UnsafeMutablePointer<CChar>? = nil
var linecap:Int = 0
defer { free(line) }
return getline(&line, &linecap, file) > 0 ? String(cString: line!) : nil
}
deinit {
fclose(file)
}
}
extension LineReader: Sequence {
public func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
用法:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return; // cannot open file
}
for line in reader {
print(">" + line.trimmingCharacters(in: .whitespacesAndNewlines))
}
库GitHub上
Answer 3:
我迟到的游戏,但这里的小班我写了这个目的。 一些不同的尝试后(尽量继承NSInputStream
)我发现这是一个合理的和简单的方法。
记得#import <stdio.h>
在你的桥接报头。
// Use is like this:
let readLine = ReadLine(somePath)
while let line = readLine.readLine() {
// do something...
}
class ReadLine {
private var buf = UnsafeMutablePointer<Int8>.alloc(1024)
private var n: Int = 1024
let path: String
let mode: String = "r"
private lazy var filepointer: UnsafeMutablePointer<FILE> = {
let csmode = self.mode.withCString { cs in return cs }
let cspath = self.path.withCString { cs in return cs }
return fopen(cspath, csmode)
}()
init(path: String) {
self.path = path
}
func readline() -> String? {
// unsafe for unknown input
if getline(&buf, &n, filepointer) > 0 {
return String.fromCString(UnsafePointer<CChar>(buf))
}
return nil
}
deinit {
buf.dealloc(n)
fclose(filepointer)
}
}
Answer 4:
事实证明,良好的老fasioned C API是斯威夫特蛮舒服的,一旦你神交UnsafePointer。 下面是一个简单的猫,从标准输入并打印读取到stdout行由行。 你甚至不需要基础。 达尔文就足够了:
import Darwin
let bufsize = 4096
// let stdin = fdopen(STDIN_FILENO, "r") it is now predefined in Darwin
var buf = UnsafePointer<Int8>.alloc(bufsize)
while fgets(buf, Int32(bufsize-1), stdin) {
print(String.fromCString(CString(buf)))
}
buf.destroy()
Answer 5:
该函数接受一个文件流,并返回一个AnyGenerator
返回该文件的每一行:
func lineGenerator(file:UnsafeMutablePointer<FILE>) -> AnyGenerator<String>
{
return AnyGenerator { () -> String? in
var line:UnsafeMutablePointer<CChar> = nil
var linecap:Int = 0
defer { free(line) }
return getline(&line, &linecap, file) > 0 ? String.fromCString(line) : nil
}
}
因此,举例来说,这里是你将如何使用它打印在您的应用程序包名为“foo”文件的每一行:
let path = NSBundle.mainBundle().pathForResource("foo", ofType: nil)!
let file = fopen(path,"r") // open the file stream
for line in lineGenerator(file) {
// suppress print's automatically inserted line ending, since
// lineGenerator captures each line's own new line character.
print(line, separator: "", terminator: "")
}
fclose(file) // cleanup the file stream
我通过修改亚历克斯·布朗的回答,除去马丁的r评论中提及了内存泄漏,并通过更新它与雨燕2.2(7.3的Xcode)工作开发了这个答案。
Answer 6:
试试这个答案,或阅读的Mac OS 流编程指南 。
您可能会发现性能实际上会更好地使用stringWithContentsOfURL
,不过,因为它会更快比基于光盘的数据存储为基础的(或存储器映射)数据进行工作。
执行它在另一个线程上是有据可查的,还举例这里 。
更新
如果您不想一次阅读完这一切,你不希望使用NSStreams,那么你可能需要使用C级文件I / O。 有很多理由不这样做-堵,字符编码,处理I / O错误,速度,仅举几例-这是该基金会库是。 我已经勾勒低于一个简单的答案与ACSII数据只涉及:
class StreamReader {
var eofReached = false
let fileHandle: UnsafePointer<FILE>
init (path: String) {
self.fileHandle = fopen(path.bridgeToObjectiveC().UTF8String, "rb".bridgeToObjectiveC().UTF8String)
}
deinit {
fclose(self.fileHandle)
}
func nextLine() -> String {
var nextChar: UInt8 = 0
var stringSoFar = ""
var eolReached = false
while (self.eofReached == false) && (eolReached == false) {
if fread(&nextChar, 1, 1, self.fileHandle) == 1 {
switch nextChar & 0xFF {
case 13, 10 : // CR, LF
eolReached = true
case 0...127 : // Keep it in ASCII
stringSoFar += NSString(bytes:&nextChar, length:1, encoding: NSASCIIStringEncoding)
default :
stringSoFar += "<\(nextChar)>"
}
} else { // EOF or error
self.eofReached = true
}
}
return stringSoFar
}
}
// OP's original request follows:
var aStreamReader = StreamReader(path: "~/Desktop/Test.text".stringByStandardizingPath)
while aStreamReader.eofReached == false { // Changed property name for more accurate meaning
let currentline = aStreamReader.nextLine()
//list.addItem(currentline)
println(currentline)
}
Answer 7:
(注:我使用的是斯威夫特3.0.1上的Xcode 8.2.1和MacOS塞拉利昂10.12.3)
所有我在这里看到的答案错过了,他可以找LF或CRLF。 如果一切顺利的话,他/她可能只是匹配LF和检查返回的字符串在最后一个额外的CR。 但一般查询涉及多个搜索字符串。 换言之,分隔符必须是一个Set<String>
,其中该组既不是空的,也不包含空字符串,而不是一个单一的字符串。
在我第一次尝试在这最后一年,我试着做“正确的事”并搜索一般组字符串。 它太硬; 你需要一个完全成熟的解析器和状态机和等。 我放弃了它,该项目是的一部分。
现在,我又做项目,再次面临着同样的挑战。 现在,我要硬编码的CR和LF搜索。 我不认为任何人会需要两个半独立半依赖的人物搜索这样的外部CR / LF解析。
我使用所提供的搜索方法Data
,所以我没有做字符串编码和东西在这里。 只是原始的二进制处理。 只是假设我有一个ASCII超集,如ISO Latin-1的或UTF-8,在这里。 您可以在下一高层处理字符串编码,和你踢上附有还是次要码点的CR / LF是否算作一个CR或LF。
该算法:只是不断寻找下一个CR,并从当前字节的下一个LF偏移。
- 如果没有被发现,然后再考虑下一个数据串是从当前偏移到最终的数据。 需要注意的是终止长度为0马克这是你的阅读循环结束。
- 如果LF第一次发现,或仅LF被发现,考虑下一个数据串是从当前偏移到LF。 需要注意的是终止长度为1.将偏移到LF后。
- 如果只找到一个CR,不喜欢的LF的情况下(只是用不同的字节值)。
- 否则,我们得到了一个CR接着是LF。
- 如果这两个是相邻的,则处理像LF情况下,除了终止子长度为2。
- 如果他们之间有一个字节,字节表示还CR,那么我们得到了“Windows 8开发者写了一个二进制\ r \ n而在文本模式下,给人一种\ r \ r \ n”个问题。 还处理它像LF情况下,除了终止子长度为3。
- 否则,CR和LF没有连接,并处理像刚刚CR情况。
下面是一些代码:
struct DataInternetLineIterator: IteratorProtocol {
/// Descriptor of the location of a line
typealias LineLocation = (offset: Int, length: Int, terminatorLength: Int)
/// Carriage return.
static let cr: UInt8 = 13
/// Carriage return as data.
static let crData = Data(repeating: cr, count: 1)
/// Line feed.
static let lf: UInt8 = 10
/// Line feed as data.
static let lfData = Data(repeating: lf, count: 1)
/// The data to traverse.
let data: Data
/// The byte offset to search from for the next line.
private var lineStartOffset: Int = 0
/// Initialize with the data to read over.
init(data: Data) {
self.data = data
}
mutating func next() -> LineLocation? {
guard self.data.count - self.lineStartOffset > 0 else { return nil }
let nextCR = self.data.range(of: DataInternetLineIterator.crData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
let nextLF = self.data.range(of: DataInternetLineIterator.lfData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
var location: LineLocation = (self.lineStartOffset, -self.lineStartOffset, 0)
let lineEndOffset: Int
switch (nextCR, nextLF) {
case (nil, nil):
lineEndOffset = self.data.count
case (nil, let offsetLf):
lineEndOffset = offsetLf!
location.terminatorLength = 1
case (let offsetCr, nil):
lineEndOffset = offsetCr!
location.terminatorLength = 1
default:
lineEndOffset = min(nextLF!, nextCR!)
if nextLF! < nextCR! {
location.terminatorLength = 1
} else {
switch nextLF! - nextCR! {
case 2 where self.data[nextCR! + 1] == DataInternetLineIterator.cr:
location.terminatorLength += 1 // CR-CRLF
fallthrough
case 1:
location.terminatorLength += 1 // CRLF
fallthrough
default:
location.terminatorLength += 1 // CR-only
}
}
}
self.lineStartOffset = lineEndOffset + location.terminatorLength
location.length += self.lineStartOffset
return location
}
}
当然,如果你有一个Data
的长度是至少一千兆字节的显著部分的块,那么只要没有更多的CR或LF从目前的字节偏移存在采取一击; 总是徒劳地寻找,直到每个迭代中结束。 读块中的数据会有所帮助:
struct DataBlockIterator: IteratorProtocol {
/// The data to traverse.
let data: Data
/// The offset into the data to read the next block from.
private(set) var blockOffset = 0
/// The number of bytes remaining. Kept so the last block is the right size if it's short.
private(set) var bytesRemaining: Int
/// The size of each block (except possibly the last).
let blockSize: Int
/// Initialize with the data to read over and the chunk size.
init(data: Data, blockSize: Int) {
precondition(blockSize > 0)
self.data = data
self.bytesRemaining = data.count
self.blockSize = blockSize
}
mutating func next() -> Data? {
guard bytesRemaining > 0 else { return nil }
defer { blockOffset += blockSize ; bytesRemaining -= blockSize }
return data.subdata(in: blockOffset..<(blockOffset + min(bytesRemaining, blockSize)))
}
}
你必须要,因为我没有这么做过这些想法自己一起拌匀。 考虑:
- 当然,你必须要考虑完全包含在一个块行。
- 但是,你必须处理时,线的两端都在相邻块。
- 或者当端点之间有至少一个块
- 大并发症是当符合多字节序列结束,但该序列跨越两个块! (在短短CR这也是在块的最后一个字节结束的行是等效的情况下,因为你需要阅读下一大块,看看你只是-CR实际上是一个CRLF或CR-CRLF。类似的还有有心计当块与CR-CR结束。)
- 而你需要的时候有从当前没有更多的终结偏移处理,但最终的数据是在后面的块。
祝好运!
Answer 8:
雨燕4.2安全语法
class LineReader {
let path: String
init?(path: String) {
self.path = path
guard let file = fopen(path, "r") else {
return nil
}
self.file = file
}
deinit {
fclose(file)
}
var nextLine: String? {
var line: UnsafeMutablePointer<CChar>?
var linecap = 0
defer {
free(line)
}
let status = getline(&line, &linecap, file)
guard status > 0, let unwrappedLine = line else {
return nil
}
return String(cString: unwrappedLine)
}
private let file: UnsafeMutablePointer<FILE>
}
extension LineReader: Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
用法:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return
}
reader.forEach { line in
print(line.trimmingCharacters(in: .whitespacesAndNewlines))
}
Answer 9:
或者你可以简单地使用Generator
:
let stdinByLine = GeneratorOf({ () -> String? in
var input = UnsafeMutablePointer<Int8>(), lim = 0
return getline(&input, &lim, stdin) > 0 ? String.fromCString(input) : nil
})
让我们试试吧
for line in stdinByLine {
println(">>> \(line)")
}
这很简单,懒惰,而且容易与像普查员和仿函数,如地图,减少,过滤器等快捷连锁的东西; 使用lazy()
包装。
它可以推广到所有的FILE
为:
let byLine = { (file:UnsafeMutablePointer<FILE>) in
GeneratorOf({ () -> String? in
var input = UnsafeMutablePointer<Int8>(), lim = 0
return getline(&input, &lim, file) > 0 ? String.fromCString(input) : nil
})
}
所谓像
for line in byLine(stdin) { ... }
Answer 10:
我想一个版本,没有不断修改缓冲区或重复的代码,都是低效率的,并且将允许任何大小的缓冲区(包括1个字节)和任何分隔符。 它有一个公共方法: readline()
调用此方法将返回在EOF下一行或零的字符串值。
import Foundation
// LineStream(): path: String, [buffSize: Int], [delim: String] -> nil | String
// ============= --------------------------------------------------------------
// path: the path to a text file to be parsed
// buffSize: an optional buffer size, (1...); default is 4096
// delim: an optional delimiter String; default is "\n"
// ***************************************************************************
class LineStream {
let path: String
let handle: NSFileHandle!
let delim: NSData!
let encoding: NSStringEncoding
var buffer = NSData()
var buffSize: Int
var buffIndex = 0
var buffEndIndex = 0
init?(path: String,
buffSize: Int = 4096,
delim: String = "\n",
encoding: NSStringEncoding = NSUTF8StringEncoding)
{
self.handle = NSFileHandle(forReadingAtPath: path)
self.path = path
self.buffSize = buffSize < 1 ? 1 : buffSize
self.encoding = encoding
self.delim = delim.dataUsingEncoding(encoding)
if handle == nil || self.delim == nil {
print("ERROR initializing LineStream") /* TODO use STDERR */
return nil
}
}
// PRIVATE
// fillBuffer(): _ -> Int [0...buffSize]
// ============= -------- ..............
// Fill the buffer with new data; return with the buffer size, or zero
// upon reaching end-of-file
// *********************************************************************
private func fillBuffer() -> Int {
buffer = handle.readDataOfLength(buffSize)
buffIndex = 0
buffEndIndex = buffer.length
return buffEndIndex
}
// PRIVATE
// delimLocation(): _ -> Int? nil | [1...buffSize]
// ================ --------- ....................
// Search the remaining buffer for a delimiter; return with the location
// of a delimiter in the buffer, or nil if one is not found.
// ***********************************************************************
private func delimLocation() -> Int? {
let searchRange = NSMakeRange(buffIndex, buffEndIndex - buffIndex)
let rangeToDelim = buffer.rangeOfData(delim,
options: [], range: searchRange)
return rangeToDelim.location == NSNotFound
? nil
: rangeToDelim.location
}
// PRIVATE
// dataStrValue(): NSData -> String ("" | String)
// =============== ---------------- .............
// Attempt to convert data into a String value using the supplied encoding;
// return the String value or empty string if the conversion fails.
// ***********************************************************************
private func dataStrValue(data: NSData) -> String? {
if let strVal = NSString(data: data, encoding: encoding) as? String {
return strVal
} else { return "" }
}
// PUBLIC
// readLine(): _ -> String? nil | String
// =========== ____________ ............
// Read the next line of the file, i.e., up to the next delimiter or end-of-
// file, whichever occurs first; return the String value of the data found,
// or nil upon reaching end-of-file.
// *************************************************************************
func readLine() -> String? {
guard let line = NSMutableData(capacity: buffSize) else {
print("ERROR setting line")
exit(EXIT_FAILURE)
}
// Loop until a delimiter is found, or end-of-file is reached
var delimFound = false
while !delimFound {
// buffIndex will equal buffEndIndex in three situations, resulting
// in a (re)filling of the buffer:
// 1. Upon the initial call;
// 2. If a search for a delimiter has failed
// 3. If a delimiter is found at the end of the buffer
if buffIndex == buffEndIndex {
if fillBuffer() == 0 {
return nil
}
}
var lengthToDelim: Int
let startIndex = buffIndex
// Find a length of data to place into the line buffer to be
// returned; reset buffIndex
if let delim = delimLocation() {
// SOME VALUE when a delimiter is found; append that amount of
// data onto the line buffer,and then return the line buffer
delimFound = true
lengthToDelim = delim - buffIndex
buffIndex = delim + 1 // will trigger a refill if at the end
// of the buffer on the next call, but
// first the line will be returned
} else {
// NIL if no delimiter left in the buffer; append the rest of
// the buffer onto the line buffer, refill the buffer, and
// continue looking
lengthToDelim = buffEndIndex - buffIndex
buffIndex = buffEndIndex // will trigger a refill of buffer
// on the next loop
}
line.appendData(buffer.subdataWithRange(
NSMakeRange(startIndex, lengthToDelim)))
}
return dataStrValue(line)
}
}
它被称为如下:
guard let myStream = LineStream(path: "/path/to/file.txt")
else { exit(EXIT_FAILURE) }
while let s = myStream.readLine() {
print(s)
}
文章来源: Read a file/URL line-by-line in Swift