Mex file c. Getting different results from matlab

8 visualizaciones (últimos 30 días)
Abdel Darwich
Abdel Darwich el 16 de Nov. de 2020
Comentada: Abdel Darwich el 18 de Nov. de 2020
Hi all
I have recently started to use Mex n c,
I have created a mex file using the auotgenrated function in MATLAB, however I started writing mine to make it more efficient as my code still expensive.
For some reason I do get a discrepancy between my c code and MATLAB function which can reach 20%. I do not think I have abog in the code. I do think is something to do with memory allocation and index handling It could be a bug also. I would appreciate any help. Thanks a lot.
I will add the matlab code and the c code.
========================== % .m code
function [Vxs,Vys,Vzs] = Induced_Velocity_Tret_double(X0,Y0,Z0 ,X,Y,Z,Gamma, dt)
% allocate meomery
[Nc,Mc] = size(X0);
[Ns,Ms] = size(X);
Vxs = zeros(Nc,Mc); Vys = zeros(Nc,Mc); Vzs = zeros(Nc,Mc);
cnst1 = 4 .* pi;
%% Correction terms
alpha = 1.25643;
visc = 1.802e-5; % static ari viscuicty at 15 deg
r02 = 0.0001;
a1 = 2e-4;
%% start calcauting induced veocity
for i0 = 1:Nc
for j0 = 1:Mc
for i = 1:Ns-1
t = double(i) .* dt ;
for j = 1:Ms-1
cnst2 = Gamma(i,j)./ cnst1;
% correction terms
delta = 1 + a1 .* sqrt(Gamma(i,j).^2./visc);
rc = sqrt(r02+(4.*alpha.*t.*visc.*delta));
%------ line 1
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j), Y(i,j), Z(i,j), X(i,j+1), Y(i,j+1), Z(i,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%------ line 2
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j+1), Y(i,j+1), Z(i,j+1), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%------ line 3
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1),X(i+1,j), Y(i+1,j), Z(i+1,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%------ line4
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j), Y(i+1,j), Z(i+1,j), X(i,j), Y(i,j), Z(i,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
end
end
end
end
end
function [Vx,Vy,Vz] = Vind_LINE(x0,y0,z0,x1,y1,z1,x2,y2,z2,cnst, rc)
tol = 1e-8;
Vx = 0.;Vy = 0.;Vz = 0.;
rx1 = x0-x1; ry1 = y0-y1; rz1 = z0-z1;
rx2 = x0-x2; ry2 = y0-y2; rz2 = z0-z2;
crsr1r2x = (ry1 .* rz2) - (rz1 .* ry2);
crsr1r2y = (rz1 .* rx2) - (rx1 .* rz2);
crsr1r2z = (rx1 .* ry2) - (ry1 .* rx2);
magcrsr1r2 = (crsr1r2x.^2 + crsr1r2y.^2 + crsr1r2z.^2);
if magcrsr1r2 > tol
Frc1x = crsr1r2x ./ magcrsr1r2;
Frc1y = crsr1r2y ./ magcrsr1r2;
Frc1z = crsr1r2z ./ magcrsr1r2;
magr1 = sqrt(rx1.^2 + ry1.^2 + rz1.^2);
magr2 = sqrt(rx2.^2 + ry2.^2 + rz2.^2);
r1minusr2x = rx1-rx2;
r1minusr2y = ry1-ry2;
r1minusr2z = rz1-rz2;
Frc2 = r1minusr2x.* ((rx1./magr1)-(rx2./magr2)) + r1minusr2y.*((ry1./magr1)-(ry2./magr2)) + r1minusr2z.*((rz1./magr1)-(rz2./magr2)) ;
%% correction term
magr0 = sqrt(r1minusr2x.^2 + r1minusr2y.^2 + r1minusr2z.^2);
h = magcrsr1r2 ./ magr0;
k = h.^2./(rc.^2 +h.^2);
%%
Vx = cnst .* Frc1x .* Frc2 .* k;
Vy = cnst .* Frc1y .* Frc2 .* k;
Vz = cnst .* Frc1z .* Frc2 .* k;
end
end
========================== % .c mex code
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix.h"
#include <omp.h>
static void Vind_LINE(double x0, double b_y0, double z0, double x1, double b_y1,
double z1, double x2, double y2, double z2, double cnst,
double rc, double *Vx, double *Vy, double *Vz)
{
double rx1; double ry1; double rz1; double rx2; double ry2; double rz2;
double crsr1r2x; double crsr1r2y; double crsr1r2z; double magcrsr1r2;
double magr1; double magr2; double r1minusr2x; double r1minusr2y; double r1minusr2z;
*Vx = 0.0;
*Vy = 0.0;
*Vz = 0.0;
/* Bounde vortex */
/* r0x = x2-x1; r0y = y2-y1; r0z = z2-z1; */
rx1 = x0 - x1;
ry1 = b_y0 - b_y1;
rz1 = z0 - z1;
rx2 = x0 - x2;
ry2 = b_y0 - y2;
rz2 = z0 - z2;
crsr1r2x = ry1 * rz2 - rz1 * ry2;
crsr1r2y = rz1 * rx2 - rx1 * rz2;
crsr1r2z = rx1 * ry2 - ry1 * rx2;
magcrsr1r2 = (crsr1r2x * crsr1r2x + crsr1r2y * crsr1r2y) + crsr1r2z * crsr1r2z;
if (magcrsr1r2 > 1.0E-8) {
magr1 = sqrt((rx1 * rx1 + ry1 * ry1) + rz1 * rz1);
magr2 = sqrt((rx2 * rx2 + ry2 * ry2) + rz2 * rz2);
r1minusr2x = rx1 - rx2;
r1minusr2y = ry1 - ry2;
r1minusr2z = rz1 - rz2;
magr2 = (r1minusr2x * (rx1 / magr1 - rx2 / magr2) + r1minusr2y * (ry1 /
magr1 - ry2 / magr2)) + r1minusr2z * (rz1 / magr1 - rz2 / magr2);
/* %% correction term */
magr1 = magcrsr1r2 / sqrt((r1minusr2x * r1minusr2x + r1minusr2y * r1minusr2y)
+ r1minusr2z * r1minusr2z);
magr1 *= magr1;
magr1 /= rc * rc + magr1;
/* %% */
*Vx = cnst * (crsr1r2x / magcrsr1r2) * magr2 * magr1;
*Vy = cnst * (crsr1r2y / magcrsr1r2) * magr2 * magr1;
*Vz = cnst * (crsr1r2z / magcrsr1r2) * magr2 * magr1;
}
}
/*
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[])
{
double *X0, *Y0, *Z0, *X, *Y, *Z, *Gamma;
double dt;
int Nc, Mc , Ns, Ms, NGamma, Nss, NV;
X0 = mxGetDoubles (prhs[0]);
Nc = mxGetM(prhs[0]);
Mc = mxGetN(prhs[0]);
Y0 = mxGetDoubles (prhs[1]);
Z0 = mxGetDoubles (prhs[2]);
X = mxGetDoubles (prhs[3]);
Ns = mxGetM(prhs[3]);
Ms = mxGetN(prhs[3]);
Y = mxGetDoubles (prhs[4]);
Z = mxGetDoubles (prhs[5]);
Gamma = mxGetDoubles (prhs[6]);
NGamma = mxGetM(prhs[6]);
dt = mxGetScalar (prhs[7]);
/* OUTPUTS */
plhs[0] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[1] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[2] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
//double Vxs[Nc][Mc], Vys[Nc][Mc], Vzs[Nc][Mc];
// double *Vxs, *Vys, *Vzs;
double* Vxs = (double*) mxGetDoubles(plhs[0]);
double* Vys = (double*) mxGetDoubles(plhs[1]);
double* Vzs = (double*) mxGetDoubles(plhs[2]);
int i0; int loop_ub; int i1; int b_j0; int i2; int i; double t; int i3; int j;
double cnst2; double rc; double Vx; double Vy; double Vz;
for (loop_ub = 0; loop_ub < Nc; loop_ub++) {
for (b_j0 = 0; b_j0 < Mc; b_j0++) {
for (i = 0; i <= Ns - 2; i++) {
i3 = Ms;
for (j = 0; j <= i3 - 2; j++) {
cnst2 = Gamma[i + NGamma * j] / 12.566370614359172;
/* correction terms */
rc = Gamma[i + NGamma * j] ;
rc = sqrt(0.0001 + 5.02572 * t * 1.802E-5 * (1.0 + 0.0002 * sqrt(rc *
rc / 1.802E-5)));
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
X[i + Ns * (j + 1)], Y[i + Ns * (j + 1)], Z[i +Ns * (j + 1)],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * (j + 1)], Y[i + Ns *(j + 1)], Z[i + Ns * (j + 1)],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j + 1)) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j +1)) + 1],
X[(i + Ns * j) + 1], Y[(i + Ns * j) + 1], Z[(i + Ns * j) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i +Ns * j) + 1], Y[(i+ Ns * j) + 1], Z[(i + Ns * j) + 1],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
}
}
}
}
mxFree(Vxs);
mxFree(Vys);
mxFree(Vzs);
}
/*
* File trailer for Induced_Velocity_Tret3.c
*
* [EOF]
*/
  3 comentarios
Abdel Darwich
Abdel Darwich el 16 de Nov. de 2020
Editada: Abdel Darwich el 16 de Nov. de 2020
Hi James
I did what you said, the difference comes only in the output. Vxs, Vys and Vzs. I will edit my answer and make the code mre readable.
I did spend a lot of time trying to understand where discrepancy is coming from by printing the inputs and outputs. It can only be seen in
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
I have large matrrices which is around 60x200 so I am wondering if MATLAB is not string the data in double precesion.
Abdel Darwich
Abdel Darwich el 16 de Nov. de 2020
Hi James
I found out that my problem is the output indiuces, [loop_ub + Nc * b_j0] m the data is not stored in the order I intended to have it in.
My outout is an array nxm, I guess i am still unsure how mex deal with matlab indices.
I am wondering would I be able to use openmp in mex ?

Iniciar sesión para comentar.

Respuesta aceptada

James Tursa
James Tursa el 17 de Nov. de 2020
MATLAB stores matrices in column order. E.g., if A is a 2x3 matrix, and p is a pointer to the data area of A in a mex routine, then MATLAB will store the elements in memory as follows:
p[0] = A(1,1)
p[1] = A(2,1)
p[2] = A(1,2)
p[3] = A(2,2)
p[4] = A(1,3)
p[5] = A(2,3)
If real double full matrix A had generic dimensions of MxN, then accessing the elements linearly would be as follows with appropriate variable definitions:
M = mxGetM(prhs[0]);
N = mxGetN(prhs[0]);
p = mxGetDoubles(prhs[0]);
k = 0;
for( j=0; j<N; j++ ) {
for( i=0; i<M; i++ ) {
mexPrintf("p[%d] = A(%d,%d) = %g\n",k++,i+1,j+1,p[i+M*j]);
}
}
You can use OpenMP in mex routines, but you cannot do anything that invokes the MATLAB Memory Manager inside your parallel threads. So calling inspection routines such as the following would be OK inside these threads:
mxGetM
mxGetN
mxGetNumberOfDimensions
etc.
But doing any allocation/deallocation would not be OK inside the parallel threads:
mxCreateDoubleMatrix
mxDestroyArray
etc.
You must do all MATLAB API allocation/deallocation outside the parallel threads.

Más respuestas (0)

Categorías

Más información sobre Write C Functions Callable from MATLAB (MEX Files) en Help Center y File Exchange.

Etiquetas

Productos

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by