的使用FFT两个函数计算卷积(FFTW)(Calculating convolution of tw

2019-07-30 02:20发布

I'm trying to speed up a computation for a neural simulator using the FFT.

The equation is:

(1) \sum(j=1 to N) (w(i - j) * s_NMDA[j])

where s_NMDA is a vector of length N and w is defined by:

(2) w(j) = tanh[1/(2 * sigma * p)] * exp(-abs(j) / (sigma * p)]

where sigma and p are constants.

(is there a better way to render equations on stackoverflow?)

The calculation has to be done for N neurons. Since (1) only depends on the absolute distance abs(i - j), it should be possible to compute this using the FFT (convolution theorem).

I've tried to implement this using FFTW but the results do not match with the expected results. I've never used FFTW before and now I'm not sure if my implementation is incorrect of if my assumptions about the convolution theorem are false.

void f_I_NMDA_FFT(
    const double     **states, // states[i][6] == s_NMDA[i]
    const unsigned int numNeurons)
{
    fftw_complex *distances, *sNMDAs, *convolution;
    fftw_complex *distances_f, *sNMDAs_f, *convolution_f;
    fftw_plan     p, pinv;
    const double scale = 1./numNeurons;

        distances = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * numNeurons);
        sNMDAs    = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * numNeurons);
        convolution = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * numNeurons);
        distances_f = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * numNeurons);
        sNMDAs_f    = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * numNeurons);
        convolution_f    = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * numNeurons);

        // fill input array for distances  
    for (unsigned int i = 0; i < numNeurons; ++i)
    {
        distances[i][0] = w(i);
        distances[i][1] = 0;
    }

        // fill input array for sNMDAs
    for (unsigned int i = 0; i < numNeurons; ++i)
    {
        sNMDAs[i][0] = states[i][6];
        sNMDAs[i][1] = 0;
    }

    p = fftw_plan_dft_1d(numNeurons,
                         distances,
                         distances_f,
                         FFTW_FORWARD,
                         FFTW_ESTIMATE);
    fftw_execute(p);

    p = fftw_plan_dft_1d(numNeurons,
                         sNMDAs,
                         sNMDAs_f,
                         FFTW_FORWARD,
                         FFTW_ESTIMATE);
    fftw_execute(p);

    // convolution in frequency domain
    for(unsigned int i = 0; i < numNeurons; ++i)
    {
        convolution_f[i][0] = (distances_f[i][0] * sNMDAs_f[i][0]
            - distances_f[i][1] * sNMDAs_f[i][1]) * scale;
        convolution_f[i][1] = (distances_f[i][0] * sNMDAs_f[i][1]
            - distances_f[i][1] * sNMDAs_f[i][0]) * scale;
    }

    pinv = fftw_plan_dft_1d(numNeurons,
                            convolution_f,
                            convolution,
                            FFTW_FORWARD,
                            FFTW_ESTIMATE);
    fftw_execute(pinv);

    // compute and compare with expected result
    for (unsigned int i = 0; i < numNeurons; ++i)
    {
            double expected = 0;

            for (int j = 0; j < numNeurons; ++j)
            {
                expected += w(i - j) * states[j][6];
            }
            printf("i=%d, FFT: r%f, i%f : Expected: %f\n", i, convolution[i][0], convolution[i][1], expected);
    }

    fftw_destroy_plan(p);
    fftw_destroy_plan(pinv);

    fftw_free(distances), fftw_free(sNMDAs), fftw_free(convolution);
    fftw_free(distances_f), fftw_free(sNMDAs_f), fftw_free(convolution_f);

Here's an example output for 20 neurons:

i=0, FFT: r0.042309, i0.000000 : Expected: 0.041504
i=1, FFT: r0.042389, i0.000000 : Expected: 0.042639
i=2, FFT: r0.042466, i0.000000 : Expected: 0.043633
i=3, FFT: r0.042543, i0.000000 : Expected: 0.044487
i=4, FFT: r0.041940, i0.000000 : Expected: 0.045203
i=5, FFT: r0.041334, i0.000000 : Expected: 0.045963
i=6, FFT: r0.041405, i0.000000 : Expected: 0.046585
i=7, FFT: r0.041472, i0.000000 : Expected: 0.047070
i=8, FFT: r0.041537, i0.000000 : Expected: 0.047419
i=9, FFT: r0.041600, i0.000000 : Expected: 0.047631
i=10, FFT: r0.041660, i0.000000 : Expected: 0.047708
i=11, FFT: r0.041717, i0.000000 : Expected: 0.047649
i=12, FFT: r0.041773, i0.000000 : Expected: 0.047454
i=13, FFT: r0.041826, i0.000000 : Expected: 0.047123
i=14, FFT: r0.041877, i0.000000 : Expected: 0.046656
i=15, FFT: r0.041926, i0.000000 : Expected: 0.046052
i=16, FFT: r0.041294, i0.000000 : Expected: 0.045310
i=17, FFT: r0.042059, i0.000000 : Expected: 0.044430
i=18, FFT: r0.042144, i0.000000 : Expected: 0.043412
i=19, FFT: r0.042228, i0.000000 : Expected: 0.042253

The results seem to be almost correct, but the error increases with the number of neurons. Also, the results seem to be more accurate for positions (i) that are very low or very high. What's going on here?

Update: As suggested by Oli Charlesworth, I implemented the algorithm in octave to see if it's a implementation or math problem:

input = [0.186775; 0.186775; 0.186775; 0.186775; 0.186775; 0; 0.186775; 0.186775; 0.186775; 0.186775];

function ret = _w(i)
  ret = tanh(1 / (2* 1 * 32)) * exp(-abs(i) / (1 * 32));
end

for i = linspace(1, 10, 10)
  expected = 0;
  for j = linspace(1, 10, 10)
    expected += _w(i-j) * input(j);
  end
  expected
end

distances = _w(transpose(linspace(0, 9, 10)));

input_f = fft(input);
distances_f = fft(distances);

convolution_f = input_f .* distances_f;

convolution = ifft(convolution_f)

Results:

expected =  0.022959
expected =  0.023506
expected =  0.023893
expected =  0.024121
expected =  0.024190
expected =  0.024100
expected =  0.024034
expected =  0.023808
expected =  0.023424
expected =  0.022880
convolution =

   0.022959
   0.023036
   0.023111
   0.023183
   0.023253
   0.022537
   0.022627
   0.022714
   0.022798
   0.022880

The results are very much the same. Therefore, there must be something wrong with my understanding of the convolution theorem / FFT.

Answer 1:

要通过卷积FFT 2个信号一般需要做到这一点:

  1. 尽可能多的零添加到每一个信号作为必要的,以便它的长度变成原来的信号的累计长度 - 1(这是卷积的结果的长度)。
  2. 如果您的FFT库需要输入长度为2的幂,增加尽可能多的零根据需要每一个信号,以满足这一要求了。
  3. 计算(通过FFT)信号1的DFT。
  4. 计算(通过FFT)信号2的DFT。
  5. 乘二的DFT元素明智的。 这应该是一个复数乘法,顺便说一句。
  6. 计算相乘的DFT的逆DFT(通过FFT)。 这会成为你的卷积结果。

在你的代码中,我看到FFTW_FORWARD在所有3点的FFT。 我猜测,如果这不是问题,这是它的一部分。 最后FFT应该是“落后”,而不是“前进”。

另外,我觉得你的第二个表现,而不是需要“+”“ - ”:

convolution_f[i][0] = (distances_f[i][0] * sNMDAs_f[i][0]
            - distances_f[i][1] * sNMDAs_f[i][1]) * scale;

convolution_f[i][1] = (distances_f[i][0] * sNMDAs_f[i][1]
            - distances_f[i][1] * sNMDAs_f[i][0]) * scale;


Answer 2:

我终于解决了这个问题,非常感谢亚历克斯和奥利查尔斯沃思您的建议!

function ret = _w(i)
  ret = tanh(1 / (2* 1 * 32)) * exp(-abs(i) / (1 * 32));
end

_input = [0.186775; 0.186775; 0.186775; 0.186775; 0.186775; 0; 0.186775; 0.186775; 0.186775; 0.186775];
n = size(_input)(1);

input = _input;

for i = linspace(1, n, n)
  expected = 0;
  for j = linspace(1, n, n)
    expected += _w(i-j) * input(j);
  end
  expected
end

input = vertcat(_input, zeros((2*n)-n-1,1));

distances = _w(transpose(linspace(0, (2*n)-n-1, n)));
distances = vertcat(flipud(distances), distances(2:end));

input_f = fft(input);
distances_f = fft(distances);

convolution_f = input_f .* distances_f;

convolution = ifft(convolution_f)(n:end)

结果:

expected =  0.022959
expected =  0.023506
expected =  0.023893
expected =  0.024121
expected =  0.024190
expected =  0.024100
expected =  0.024034
expected =  0.023808
expected =  0.023424
expected =  0.022880
convolution =

   0.022959
   0.023506
   0.023893
   0.024121
   0.024190
   0.024100
   0.024034
   0.023808
   0.023424
   0.022880

我基本上忘了才能在正确的道路的距离排列。 如果有人有兴趣,我可以在以后提供更详细的解释。

更新:(说明)

下面是我的距离向量(5元)看起来还是很喜欢:

i =  1       2       3       4       5

| _w(0) | _w(1) | _w(2) | _w(3) | _w(4) |

在这个载体,我申请一个内核,例如:

|  0.1  |  0.1  |  0.0  |  0.2  |  0.3  |

因为我用循环卷积,对于第一神经元_w(0)的结果为:

0.0 * _w(2)+ 0.1 * _w(1)+ 0.1 * _w(0)+ 0.1 * _w(1)+ 0.0 * _w(2)

但是,这是不正确,结果应该是:

0.1 * _w(0)+ 0.1 * _w(1)+ 0.0 * _w(2)+ 0.2 * _w(3)+ 0.3 * _w(4)

为了实现这一目标,我不得不“镜”我的距离向量,并添加一些填充到内核:

input = vertcat(_input, zeros((2*n)-n-1,1));
distances = _w(transpose(linspace(0, (2*n)-n-1, n)));
distances = _w(transpose(linspace(0, (2*n)-n-1, n)));

i =  1       2        3       4       5       6       7       8       9

| _w(4) | _w(3)  | _w(2) | _w(1) | _w(0) | _w(1) | _w(2) | _w(3) | _w(4) |

|  0.1  |  0.1   |  0.0  |  0.2  |  0.3  |  0    |  0    |  0    |  0    |

现在,如果我申请的卷积,其结果对于i = [5:9]正是我一直在寻找的结果,所以我只能放弃[1:4]和我做:)

convolution = ifft(convolution_f)(n:end)


文章来源: Calculating convolution of two functions using FFT (FFTW)