mex cuda code for matrix multiplication
Mostrar comentarios más antiguos
Hello,
I made a simple mex cuda code to calculate multiplication of two matrices of size NxN but never get the same results as in matlab command
C = A*B except for B is a diagonal matrix. Just wondering if I was wrong at any point? The code is attached. Thanks.
#include "mex.h"
#include "cuda_runtime.h"
// CUDA kernel for matrix multiplication
__global__ void matrixMultiplication(const double* A, const double* B, double* C, int N) {
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
if (row < N && col < N) {
double sum = 0;
for (int i = 0; i < N; i++) {
sum += A[row * N + i] * B[i * N + col];
}
C[row * N + col] = sum;
}
}
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
/* Declare all variables */
const double* A;
const double* B;
double* C;
double* d_A = nullptr;
double* d_B = nullptr;
double* d_C = nullptr;
int N;
A = mxGetPr(prhs[0]);
B = mxGetPr(prhs[1]); // Use prhs[1] for matrix B
N = mxGetN(prhs[0]); // Get the number of columns of matrix A
/* Initialize output array */
plhs[0] = mxCreateDoubleMatrix((mwSize)N, (mwSize)N, mxREAL);
C = mxGetPr(plhs[0]);
// Allocate device memory for matrices A, B, and C
cudaMalloc((void**)&d_A, N * N * sizeof(double));
cudaMalloc((void**)&d_B, N * N * sizeof(double));
cudaMalloc((void**)&d_C, N * N * sizeof(double));
// Copy matrices A and B from host to device
cudaMemcpy(d_A, A, N * N * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, N * N * sizeof(double), cudaMemcpyHostToDevice);
// Define grid and block dimensions
dim3 dimGrid(128, 128);
dim3 dimBlock(1, 1);
// dim3 dimGrid((N + 15) / 16, (N + 15) / 16); // Adjust block size as needed
// dim3 dimBlock(16, 16);
// Launch the CUDA kernel
matrixMultiplication << <dimGrid, dimBlock >> > (d_A, d_B, d_C, N);
cudaDeviceSynchronize();
// Copy the result matrix C from device to host
cudaMemcpy(C, d_C, N * N * sizeof(double), cudaMemcpyDeviceToHost);
// Free device memory
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
}
Respuesta aceptada
Más respuestas (0)
Categorías
Más información sobre Matrix Indexing en Centro de ayuda y File Exchange.
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!