Swift FFT - Complex split issue

2019-03-13 07:56发布

问题:

I am trying to perform FFT on an audio file to find frequency using the Accelerate framework. I have adapted code (probably wrong) from this question: Spectrogram from AVAudioPCMBuffer using Accelerate framework in Swift

Although, the magnitudes from 'spectrum' are either '0', 'inf' or 'nan', and the 'real' and 'imag' components of the complex split print similar results; indicating that this is the cause of the problem as: 'magnitude = sqrt(pow(real,2)+pow(imag,2)'. Correct me if i'm wrong, but I think the rest of the code is ok.

Why am I receiving these results and how can I fix it (what should the split components be), and what am I doing wrong? Please keep in mind that I am very new to FFT and sampling and haven't got a clue how to set this up for an audio file, so any help would be greatly appreciated. Thanks.

Here's the code i'm using:

    // get audio file
    let fileURL:NSURL = NSBundle.mainBundle().URLForResource("foo", withExtension: "mp3")!
    let audioFile = try!  AVAudioFile(forReading: fileURL)
    let fileFormat = audioFile.processingFormat
    let frameCount = UInt32(audioFile.length)

    let buffer = AVAudioPCMBuffer(PCMFormat: fileFormat, frameCapacity: frameCount)
    let audioEngine = AVAudioEngine()
    let playerNode = AVAudioPlayerNode()
    audioMixerNode = audioEngine.mainMixerNode

    let bufferSize = Int(frameCount)
    let channels: NSArray = [Int(buffer.format.channelCount)]
    let channelCount = channels.count
    let floats1 = [Int(buffer.frameLength)]
    for var i=0; i<channelCount; ++i {
        channelSamples.append([])
        let firstSample = buffer.format.interleaved ? i : i*bufferSize
        for var j=firstSample; j<bufferSize; j+=buffer.stride*2 {
            channelSamples[i].append(DSPComplex(real: buffer.floatChannelData.memory[j], imag: buffer.floatChannelData.memory[j+buffer.stride]))
        }
    }

    // connect node
    audioEngine.attachNode(playerNode)
    audioEngine.connect(playerNode, to: audioMixerNode, format: playerNode.outputFormatForBus(0))

    // Set up the transform
    let log2n = UInt(round(log2(Double(bufferSize))))
    let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2))

    // Create the complex split value to hold the output of the transform
    // why doesn't this work?
    var realp = [Float](count: bufferSize/2, repeatedValue: 0)
    var imagp = [Float](count: bufferSize/2, repeatedValue: 0)
    var output = DSPSplitComplex(realp: &realp, imagp: &imagp)

    vDSP_ctoz(UnsafePointer(channelSamples), 2, &output, 1, UInt(bufferSize / 2))

    // Do the fast Fourier forward transform
    vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD))

    // Convert the complex output to magnitude
    var fft = [Float](count:Int(bufferSize / 2), repeatedValue:0.0)
    let bufferOver2: vDSP_Length = vDSP_Length(bufferSize / 2)
    vDSP_zvmags(&output, 1, &fft, 1, bufferOver2)

    var spectrum = [Float]()
    for var i=0; i<bufferSize/2; ++i {
        let imag = output.imagp[i]
        let real = output.realp[i]
        let magnitude = sqrt(pow(real,2)+pow(imag,2))
        spectrum.append(magnitude) }

    // Release the setup
    vDSP_destroy_fftsetup(fftSetup)

回答1:

There were a couple of problems with your code:

  1. you weren't reading in the audio file samples
  2. channelSamples was packed incorrectly
  3. vDSP_fft_zrip was reading beyond the end of the array. it expects 2^log2n samples
  4. vDSP_fft_zrip's output is packed and your calculations expect unpacked

Swift 4 version now with actual fix for point 3

let fileURL = Bundle.main.url(forResource: "foo", withExtension: "mp3")!
let audioFile = try!  AVAudioFile(forReading: fileURL as URL)
let frameCount = UInt32(audioFile.length)

let log2n = UInt(round(log2(Double(frameCount))))
let bufferSizePOT = Int(1 << log2n)

let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: AVAudioFrameCount(bufferSizePOT))!
try! audioFile.read(into: buffer, frameCount:frameCount)

// Not sure if AVAudioPCMBuffer zero initialises extra frames, so when in doubt...
let leftFrames = buffer.floatChannelData![0]
for i in Int(frameCount)..<Int(bufferSizePOT) {
    leftFrames[i] = 0
}

// Set up the transform
let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2))!

// create packed real input
var realp = [Float](repeating: 0, count: bufferSizePOT/2)
var imagp = [Float](repeating: 0, count: bufferSizePOT/2)
var output = DSPSplitComplex(realp: &realp, imagp: &imagp)

leftFrames.withMemoryRebound(to: DSPComplex.self, capacity: bufferSizePOT / 2) {
    vDSP_ctoz($0, 2, &output, 1, UInt(bufferSizePOT / 2))
}

// Do the fast Fourier forward transform, packed input to packed output
vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD))

// you can calculate magnitude squared here, with care
// as the first result is wrong! read up on packed formats
var fft = [Float](repeating:0.0, count:Int(bufferSizePOT / 2))
vDSP_zvmags(&output, 1, &fft, 1, vDSP_Length(bufferSizePOT / 2))

// Release the setup
vDSP_destroy_fftsetup(fftSetup)