MATLAB Answers

Why using cufft cause a fatal error LNK1120 in MEX cuda?

5 views (last 30 days)
I'm trying to write a MEX gateway (in cuda) function to add two arrays in GPU. I would like to filter one of the signals (MediumX in the following code) before i do the summation in GPU. However, when I use "cufftPlan1d" to generate a fft plan, I get error durring compiling in Matlab. The error is :
Error using mex
Creating library test2_GPUArray.lib and object test2_GPUArray.exp
test2_GPUArray.obj : error LNK2019: unresolved external symbol cufftPlan1d referenced in function mexFunction
test2_GPUArray.mexw64 : fatal error LNK1120: 1 unresolved externals
Error in mexcuda (line 168)
[varargout{1:nargout}] = mex(mexArguments{:});
Error in test2_GPUArray_matlabRunner (line 3)
mexcuda('-v', '' , 'NVCCFLAGS=-gencode=arch=compute_50,code=sm_50 -Xptxas -dlcm=cg ');
Here is my MEX gateway code:
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <stdio.h>
#include "cuda.h"
#include <iostream>
#include <mex.h>
#include "gpu/mxGPUArray.h"
#include <cuComplex.h>
#include <cublas_v2.h>
#include <thrust/complex.h>
#include <cufft.h>
//#define NRANK 1 // signals are 1-dimensional (NX spatial points).
//typedef thrust::complex<float> fcomp;
//#define NX 256
//#define /*BATCH 10*/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
if (code != cudaSuccess)
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
__global__ void add(int* Device_Data_Added,int* Device_Data, int* Device_MediumX, cudaTextureObject_t tex, int N) {
int TID = threadIdx.y * blockDim.x + threadIdx.x;
int BlockOFFset = blockDim.x * blockDim.y * blockIdx.x;
int RowOFFset = blockDim.x * blockDim.y * gridDim.x * blockIdx.y;
int GID_RowBased = BlockOFFset + TID;
if (GID_RowBased < N) {
Device_Data_Added[GID_RowBased] = Device_Data[GID_RowBased] + Device_MediumX[GID_RowBased];
///Device_Data_Added[GID_RowBased] = tex1Dfetch<int>(tex, GID_RowBased + 1.0f) + Device_MediumX[GID_RowBased];
void mexFunction(int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
int N = 1024;
int BATCH = 1;
int ArrayByteSize = sizeof(int) * N;
int* Data;
int* Data_New;
int* MediumX;
int * Device_MediumX;
//mxGPUArray const* MediumX = mxGPUCreateFromMxArray(prhs[0]); // Can be CPU or GPU, will copy to GPU if its not already there
//int* Device_MediumX = static_cast<int*>((int*)mxGPUGetDataReadOnly(MediumX)); // get the pointer itself (assuming float data)
MediumX = (int*)mxGetPr(prhs[0]);
// filtering
// fcomp* MediumX_Complex =new fcomp[N];
// for (int i = 0; i < N; i++) {
//reinterpret_cast<float *> (MediumX_Complex)[2*i]= static_cast <float> (MediumX[i]);
//reinterpret_cast<float*> (MediumX_Complex)[2 * i+1] = static_cast <float> (0);
// }
cufftHandle plan;
cufftReal* MediumXF;
cudaMalloc((void**)&MediumXF, sizeof(cufftReal) * N * BATCH);
cufftPlan1d(&plan, N, CUFFT_R2C, BATCH);
(cudaMalloc((void**)&Device_MediumX, sizeof(int) * N));
(cudaMemcpy(Device_MediumX, MediumX, sizeof(int) * N, cudaMemcpyHostToDevice));
Data = (int*)mxGetPr(prhs[1]);
int* Device_Data; // device pointer to the X coordinates of the medium
gpuErrchk(cudaMalloc((void**)&Device_Data, ArrayByteSize));
gpuErrchk(cudaMemcpy(Device_Data, Data, ArrayByteSize, cudaMemcpyHostToDevice));
plhs[0] = mxCreateNumericMatrix(N, 1, mxINT32_CLASS, mxREAL);
Data_New = (int*)mxGetData(plhs[0]);
int* Device_Data_Added; // device pointer to the X coordinates of the medium
gpuErrchk(cudaMalloc((void**)&Device_Data_Added, ArrayByteSize));
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
resDesc.res.linear.devPtr = Device_Data;
resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
resDesc.res.linear.desc.x = 32; // bits per channel
resDesc.res.linear.sizeInBytes = ArrayByteSize;
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.readMode = cudaReadModeElementType;
// create texture object: we only have to do this once!
cudaTextureObject_t tex = 0;
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
dim3 block(1024);
int GridX = (N / block.x+1);
dim3 grid(GridX);//SystemSetup.NumberOfTransmitter
add << <grid, block >> > (Device_Data_Added,Device_Data, Device_MediumX, tex, N);
(cudaMemcpy(Data_New, Device_Data_Added, ArrayByteSize, cudaMemcpyDeviceToHost));
//delete[] MediumX_Complex;
There is no filtering defined in this code for now, but i think in need to be able to get the cufftPlan1d running first. Could you please let me know what is wrong? I alreayd have the "#include <cufft.h>" at the beginning of my code!

Accepted Answer

Linda Koletsou Soulti
Linda Koletsou Soulti on 8 Jul 2021
Hello Moein,
you will need to also link against cuFFT library using -lcufft in a simlar way as NPP is used in the following example:

Sign in to comment.

More Answers (0)




Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by