Why using cufft cause a fatal error LNK1120 in MEX cuda?
    7 visualizaciones (últimos 30 días)
  
       Mostrar comentarios más antiguos
    
    Moein Mozaffarzadeh
 el 6 de Jul. de 2021
  
    
    
    
    
    Comentada: Moein Mozaffarzadeh
 el 9 de Jul. de 2021
            Hi, 
I'm trying to write a MEX gateway (in cuda) function to add two arrays in GPU. I would like to filter one of the signals (MediumX in the following code) before i do the summation in GPU. However, when I use "cufftPlan1d" to generate a fft plan, I get error durring compiling in Matlab.   The error is :
Error using mex
   Creating library test2_GPUArray.lib and object test2_GPUArray.exp
test2_GPUArray.obj : error LNK2019: unresolved external symbol cufftPlan1d referenced in function mexFunction
test2_GPUArray.mexw64 : fatal error LNK1120: 1 unresolved externals
Error in mexcuda (line 168)
    [varargout{1:nargout}] = mex(mexArguments{:});
Error in test2_GPUArray_matlabRunner (line 3)
mexcuda('-v', 'test2_GPUArray.cu' , 'NVCCFLAGS=-gencode=arch=compute_50,code=sm_50  -Xptxas -dlcm=cg ');
Here is my MEX gateway code:
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <stdio.h>
#include "cuda.h"
#include <iostream>
#include <mex.h>
#include "gpu/mxGPUArray.h"
#include <cuComplex.h>
#include <cublas_v2.h>
#include <thrust/complex.h>
#include <cufft.h>
//#define NRANK 1 // signals are 1-dimensional (NX spatial points).
//typedef thrust::complex<float> fcomp;
//#define NX 256
//#define /*BATCH 10*/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
{
	if (code != cudaSuccess)
	{
		fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
		if (abort) exit(code);
	}
}
__global__ void add(int* Device_Data_Added,int* Device_Data, int* Device_MediumX, cudaTextureObject_t tex, int N) {
	int TID = threadIdx.y * blockDim.x + threadIdx.x;
	int BlockOFFset = blockDim.x * blockDim.y * blockIdx.x;
	int RowOFFset = blockDim.x * blockDim.y * gridDim.x * blockIdx.y;
	int GID_RowBased = BlockOFFset + TID;
	if (GID_RowBased < N) {
		Device_Data_Added[GID_RowBased] = Device_Data[GID_RowBased] + Device_MediumX[GID_RowBased];
		///Device_Data_Added[GID_RowBased] = tex1Dfetch<int>(tex, GID_RowBased + 1.0f) + Device_MediumX[GID_RowBased];
	}
}
void mexFunction(int nlhs, mxArray* plhs[],
	int nrhs, const mxArray* prhs[]) {
	int N = 1024;
	int BATCH = 1;
	int ArrayByteSize = sizeof(int) * N;
	int* Data;
	int* Data_New;
	int* MediumX;
    int * Device_MediumX;
	//mxGPUArray const* MediumX = mxGPUCreateFromMxArray(prhs[0]); // Can be CPU or GPU, will copy to GPU if its not already there
	//int* Device_MediumX = static_cast<int*>((int*)mxGPUGetDataReadOnly(MediumX)); // get the pointer itself (assuming float data)
	  MediumX = (int*)mxGetPr(prhs[0]);
	  // filtering
	 // fcomp* MediumX_Complex =new fcomp[N];
	 // for (int i = 0; i < N; i++) {
		//reinterpret_cast<float *> (MediumX_Complex)[2*i]= static_cast <float> (MediumX[i]);
		//reinterpret_cast<float*> (MediumX_Complex)[2 * i+1] = static_cast <float> (0);
	 // }
	  cufftHandle plan;
	  cufftReal* MediumXF;
	  cudaMalloc((void**)&MediumXF,  sizeof(cufftReal) * N * BATCH);
	  cufftPlan1d(&plan,  N,  CUFFT_R2C,  BATCH);
	  //
	(cudaMalloc((void**)&Device_MediumX, sizeof(int) * N));
	(cudaMemcpy(Device_MediumX, MediumX, sizeof(int) * N, cudaMemcpyHostToDevice));
	Data = (int*)mxGetPr(prhs[1]);
	int* Device_Data; // device pointer to the X coordinates of the medium
	gpuErrchk(cudaMalloc((void**)&Device_Data, ArrayByteSize));
	gpuErrchk(cudaMemcpy(Device_Data, Data, ArrayByteSize, cudaMemcpyHostToDevice));
	plhs[0] = mxCreateNumericMatrix(N, 1, mxINT32_CLASS, mxREAL);
	Data_New = (int*)mxGetData(plhs[0]);
	int* Device_Data_Added; // device pointer to the X coordinates of the medium
	gpuErrchk(cudaMalloc((void**)&Device_Data_Added, ArrayByteSize));
	cudaResourceDesc resDesc;
	memset(&resDesc, 0, sizeof(resDesc));
	resDesc.resType = cudaResourceTypeLinear;
	resDesc.res.linear.devPtr = Device_Data;
	resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
	resDesc.res.linear.desc.x = 32; // bits per channel
	resDesc.res.linear.sizeInBytes = ArrayByteSize;
	cudaTextureDesc texDesc;
	memset(&texDesc, 0, sizeof(texDesc));
	texDesc.readMode = cudaReadModeElementType;
	// create texture object: we only have to do this once!
	cudaTextureObject_t tex = 0;
	cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
	dim3 block(1024);
	int GridX = (N / block.x+1);
	dim3 grid(GridX);//SystemSetup.NumberOfTransmitter
	add << <grid, block >> > (Device_Data_Added,Device_Data, Device_MediumX, tex, N);
	(cudaMemcpy(Data_New, Device_Data_Added, ArrayByteSize, cudaMemcpyDeviceToHost));
	cudaFree(Device_Data);
	cudaFree(Device_Data_Added);
	cudaFree(Device_MediumX);
	cudaFree(MediumXF);
	//mxGPUDestroyGPUArray(MediumX);
	cudaDestroyTextureObject(tex);
	//delete[] MediumX_Complex;
}
There is no filtering defined in this code for now, but i think in need to be able to get the cufftPlan1d running first. Could you please let me know what is wrong? I alreayd have the "#include <cufft.h>" at the beginning of my code!
Moein. 
0 comentarios
Respuesta aceptada
  Linda Koletsou Soulti
    
 el 8 de Jul. de 2021
        Hello Moein,
you will need to also link against cuFFT library using -lcufft in a simlar way as NPP is used in the following example:
Cheers,
Linda
3 comentarios
  Linda Koletsou Soulti
    
 el 9 de Jul. de 2021
				Hello Moein,
the following command should provide linking against cuFFT:
mexcuda('-v','test2_GPUArray.cu', 'NVCC_FLAGS=-gencode=arch=compute_50,code=sm_50 -Xptxas -dlcm=cg', "-LC:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\lib\x64",'-lcufft');
Cheers,
Linda
Más respuestas (0)
Ver también
Categorías
				Más información sobre GPU CUDA and MEX Programming en Help Center y File Exchange.
			
	Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!

