Getting a phase image from CUDA FFT

2019-08-24 18:22发布

问题:

I'm trying to apply a cuFFT, forward then inverse, to a 2D image. I need the real and complex parts as separate outputs so I can compute a phase and magnitude image. I haven't been able to recreate the input image, and also a non-zero phase is returned. In particular I am unsure if I'm correctly creating a full-size image from the reduced-size cuFFT complex output, which apparently stores only the left side of the spectrum. Here's my current code:

// Load image
cv::Mat_<float> img;
img = cv::imread(path,0);
if(!img.isContinuous()){
    std::cout<<"Input cv::Mat is not continuous!"<<std::endl;
    return -1;
}

float *h_Data, *d_Data;
h_Data = img.ptr<float>(0);

// Complex device pointers
cufftComplex
*d_DataSpectrum,
*d_Result,
*h_Result;

// Plans for cuFFT execution
cufftHandle
fftPlanFwd,
fftPlanInv;

// Image dimensions
const int dataH = img.rows;
const int dataW = img.cols;
const int complexW = dataW/2+1;

// Allocate memory
h_Result = (cufftComplex *)malloc(dataH    * complexW * sizeof(cufftComplex));
checkCudaErrors(cudaMalloc((void **)&d_DataSpectrum,   dataH * complexW * sizeof(cufftComplex)));
checkCudaErrors(cudaMalloc((void **)&d_Data,   dataH   * dataW   * sizeof(float)));
checkCudaErrors(cudaMalloc((void **)&d_Result,   dataH * complexW * sizeof(cufftComplex)));

// Copy image to GPU
checkCudaErrors(cudaMemcpy(d_Data,   h_Data,   dataH   * dataW *   sizeof(float), cudaMemcpyHostToDevice));

// Forward FFT
checkCudaErrors(cufftPlan2d(&fftPlanFwd, dataH, dataW, CUFFT_R2C));
checkCudaErrors(cufftExecR2C(fftPlanFwd, (cufftReal *)d_Data, (cufftComplex *)d_DataSpectrum));

// Inverse FFT
checkCudaErrors(cufftPlan2d(&fftPlanInv, dataH, dataW, CUFFT_C2C));
checkCudaErrors(cufftExecC2C(fftPlanInv, (cufftComplex *)d_DataSpectrum, (cufftComplex *)d_Result, CUFFT_INVERSE));

// Copy result to host memory
checkCudaErrors(cudaMemcpy(h_Result, d_Result, dataH * complexW * sizeof(cufftComplex), cudaMemcpyDeviceToHost));

// Convert cufftComplex to OpenCV real and imag Mat
Mat_<float> resultReal = Mat_<float>(dataH, dataW);
Mat_<float> resultImag = Mat_<float>(dataH, dataW);
for(int i=0; i<dataH; i++){
    float* rowPtrReal = resultReal.ptr<float>(i);
    float* rowPtrImag = resultImag.ptr<float>(i);
    for(int j=0; j<dataW; j++){
        if(j<complexW){
            rowPtrReal[j] = h_Result[i*complexW+j].x/(dataH*dataW);
            rowPtrImag[j] = h_Result[i*complexW+j].y/(dataH*dataW);
        }else{
            // Right side?
            rowPtrReal[j] = h_Result[i*complexW+(dataW-j)].x/(dataH*dataW);
            rowPtrImag[j] = -h_Result[i*complexW+(dataW-j)].y/(dataH*dataW);
        }
    }
}

// Compute phase and normalize to 8 bit
Mat_<float> resultPhase;
phase(resultReal, resultImag, resultPhase);
cv::subtract(resultPhase, 2*M_PI, resultPhase, (resultPhase > M_PI));
resultPhase = ((resultPhase+M_PI)*255)/(2*M_PI);
Mat_<uchar> normalized = Mat_<uchar>(dataH, dataW);
resultPhase.convertTo(normalized, CV_8U);
// Save phase image
cv::imwrite("cuda_propagation_phase.png",normalized);

// Compute amplitude and normalize to 8 bit
Mat_<float> resultAmplitude;
magnitude(resultReal, resultImag, resultAmplitude);
Mat_<uchar> normalizedAmplitude = Mat_<uchar>(dataH, dataW);
resultAmplitude.convertTo(normalizedAmplitude, CV_8U);
// Save phase image
cv::imwrite("cuda_propagation_amplitude.png",normalizedAmplitude);

I'm not sure where my error is. Is that the correct way to get back the whole image from the reduced version (the for loop)?

回答1:

I think I got it now. The 'trick' is to start with a complex matrix. Starting with a real one, you need to apply an R2C transform--which uses reduced size due to symmetry of the spectrum--and then a C2C transform, which preserves that reduced size. The solution was to create a complex input from the real one by inserting zeros as complex part, then applying two C2C transforms in a row which both preserve the whole image and make it easy to get the full sized real and imaginary matrices afterwards:

// Load image
cv::Mat_<float> img;
img = cv::imread(path,0);
if(!img.isContinuous()){
    std::cout<<"Input cv::Mat is not continuous!"<<std::endl;
    return -1;
}

float *h_DataReal = img.ptr<float>(0);
cufftComplex *h_DataComplex;

// Image dimensions
const int dataH = img.rows;
const int dataW = img.cols;

// Convert real input to complex
h_DataComplex = (cufftComplex *)malloc(dataH    * dataW * sizeof(cufftComplex));
for(int i=0; i<dataH*dataW; i++){
    h_DataComplex[i].x = h_DataReal[i];
    h_DataComplex[i].y = 0.0f;
}

// Complex device pointers
cufftComplex
*d_Data,
*d_DataSpectrum,
*d_Result,
*h_Result;

// Plans for cuFFT execution
cufftHandle
fftPlanFwd,
fftPlanInv;

// Allocate memory
h_Result = (cufftComplex *)malloc(dataH    * dataW * sizeof(cufftComplex));
checkCudaErrors(cudaMalloc((void **)&d_DataSpectrum,   dataH * dataW * sizeof(cufftComplex)));
checkCudaErrors(cudaMalloc((void **)&d_Data,   dataH   * dataW   * sizeof(cufftComplex)));
checkCudaErrors(cudaMalloc((void **)&d_Result,   dataH * dataW * sizeof(cufftComplex)));

// Copy image to GPU
checkCudaErrors(cudaMemcpy(d_Data,   h_DataComplex,   dataH   * dataW *   sizeof(cufftComplex), cudaMemcpyHostToDevice));

// Forward FFT
checkCudaErrors(cufftPlan2d(&fftPlanFwd, dataH, dataW, CUFFT_C2C));
checkCudaErrors(cufftExecC2C(fftPlanFwd, (cufftComplex *)d_Data, (cufftComplex *)d_DataSpectrum, CUFFT_FORWARD));

// Inverse FFT
checkCudaErrors(cufftPlan2d(&fftPlanInv, dataH, dataW, CUFFT_C2C));
checkCudaErrors(cufftExecC2C(fftPlanInv, (cufftComplex *)d_DataSpectrum, (cufftComplex *)d_Result, CUFFT_INVERSE));

// Copy result to host memory
checkCudaErrors(cudaMemcpy(h_Result, d_Result, dataH * dataW * sizeof(cufftComplex), cudaMemcpyDeviceToHost));

// Convert cufftComplex to OpenCV real and imag Mat
Mat_<float> resultReal = Mat_<float>(dataH, dataW);
Mat_<float> resultImag = Mat_<float>(dataH, dataW);
for(int i=0; i<dataH; i++){
    float* rowPtrReal = resultReal.ptr<float>(i);
    float* rowPtrImag = resultImag.ptr<float>(i);
    for(int j=0; j<dataW; j++){
            rowPtrReal[j] = h_Result[i*dataW+j].x/(dataH*dataW);
            rowPtrImag[j] = h_Result[i*dataW+j].y/(dataH*dataW);
    }
}


回答2:

This is an old question, but I'd like to provide additional information: the R2C preserves the same amount of information as a C2C transform, it's just doing so with about half as many elements. The R2C (and C2R) transforms take advantage of Hermitian symmetry to reduce the number of elements that are computed and stored in memory (e.g. the FFT is symmetric, so you actually don't need ~half of the terms that are being stored in a C2C transform).

To generate a 2D image of the real and imaginary components, you could use the R2C transform and then write a kernel that translates the (Nx/2+1)Ny output array into a pair of arrays of size (NxNy), taking advantage of the symmetry yourself to write the terms to the correct positions. But using a C2C transform is a bit less code, and more foolproof.