00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #if !defined escript_LocalOps_H
00016 #define escript_LocalOps_H
00017 #if defined(_WIN32) && defined(__INTEL_COMPILER)
00018 # include <mathimf.h>
00019 #else
00020 # include <math.h>
00021 #endif
00022 #ifndef M_PI
00023 # define M_PI 3.14159265358979323846
00024 #endif
00025
00026
00035 namespace escript {
00036
00041 inline
00042 bool nancheck(double d)
00043 {
00044
00045
00046
00047 #ifdef isnan
00048 return isnan(d);
00049 #elif defined _isnan
00050 return _isnan(d);
00051 #else
00052 return false;
00053 #endif
00054 }
00055
00060 inline
00061 double makeNaN()
00062 {
00063 #ifdef nan
00064 return nan("");
00065 #else
00066 return sqrt(-1.);
00067 #endif
00068
00069 }
00070
00071
00079 inline
00080 void eigenvalues1(const double A00,double* ev0) {
00081
00082 *ev0=A00;
00083
00084 }
00095 inline
00096 void eigenvalues2(const double A00,const double A01,const double A11,
00097 double* ev0, double* ev1) {
00098 const register double trA=(A00+A11)/2.;
00099 const register double A_00=A00-trA;
00100 const register double A_11=A11-trA;
00101 const register double s=sqrt(A01*A01-A_00*A_11);
00102 *ev0=trA-s;
00103 *ev1=trA+s;
00104 }
00119 inline
00120 void eigenvalues3(const double A00, const double A01, const double A02,
00121 const double A11, const double A12,
00122 const double A22,
00123 double* ev0, double* ev1,double* ev2) {
00124
00125 const register double trA=(A00+A11+A22)/3.;
00126 const register double A_00=A00-trA;
00127 const register double A_11=A11-trA;
00128 const register double A_22=A22-trA;
00129 const register double A01_2=A01*A01;
00130 const register double A02_2=A02*A02;
00131 const register double A12_2=A12*A12;
00132 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
00133 if (p<=0.) {
00134 *ev2=trA;
00135 *ev1=trA;
00136 *ev0=trA;
00137
00138 } else {
00139 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
00140 const register double sq_p=sqrt(p/3.);
00141 register double z=-q/(2*pow(sq_p,3));
00142 if (z<-1.) {
00143 z=-1.;
00144 } else if (z>1.) {
00145 z=1.;
00146 }
00147 const register double alpha_3=acos(z)/3.;
00148 *ev2=trA+2.*sq_p*cos(alpha_3);
00149 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
00150 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
00151 }
00152 }
00162 inline
00163 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
00164 {
00165 eigenvalues1(A00,ev0);
00166 *V00=1.;
00167 return;
00168 }
00180 inline
00181 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
00182 double* V0, double*V1)
00183 {
00184 register double absA00=fabs(A00);
00185 register double absA10=fabs(A10);
00186 register double absA01=fabs(A01);
00187 register double absA11=fabs(A11);
00188 register double m=absA11>absA10 ? absA11 : absA10;
00189 if (absA00>m || absA01>m) {
00190 *V0=-A01;
00191 *V1=A00;
00192 } else {
00193 if (m<=0) {
00194 *V0=1.;
00195 *V1=0.;
00196 } else {
00197 *V0=A11;
00198 *V1=-A10;
00199 }
00200 }
00201 }
00220 inline
00221 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
00222 const double A01,const double A11,const double A21,
00223 const double A02,const double A12,const double A22,
00224 double* V0,double* V1,double* V2)
00225 {
00226 double TEMP0,TEMP1;
00227 register const double I00=1./A00;
00228 register const double IA10=I00*A10;
00229 register const double IA20=I00*A20;
00230 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
00231 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
00232 *V0=-(A10*TEMP0+A20*TEMP1);
00233 *V1=A00*TEMP0;
00234 *V2=A00*TEMP1;
00235 }
00236
00254 inline
00255 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
00256 double* ev0, double* ev1,
00257 double* V00, double* V10, double* V01, double* V11,
00258 const double tol)
00259 {
00260 double TEMP0,TEMP1;
00261 eigenvalues2(A00,A01,A11,ev0,ev1);
00262 const register double absev0=fabs(*ev0);
00263 const register double absev1=fabs(*ev1);
00264 register double max_ev=absev0>absev1 ? absev0 : absev1;
00265 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
00266 *V00=1.;
00267 *V10=0.;
00268 *V01=0.;
00269 *V11=1.;
00270 } else {
00271 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
00272 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
00273 if (TEMP0<0.) {
00274 *V00=-TEMP0*scale;
00275 *V10=-TEMP1*scale;
00276 if (TEMP1<0.) {
00277 *V01= *V10;
00278 *V11=-(*V00);
00279 } else {
00280 *V01=-(*V10);
00281 *V11= (*V00);
00282 }
00283 } else if (TEMP0>0.) {
00284 *V00=TEMP0*scale;
00285 *V10=TEMP1*scale;
00286 if (TEMP1<0.) {
00287 *V01=-(*V10);
00288 *V11= (*V00);
00289 } else {
00290 *V01= (*V10);
00291 *V11=-(*V00);
00292 }
00293 } else {
00294 *V00=0.;
00295 *V10=1;
00296 *V11=0.;
00297 *V01=1.;
00298 }
00299 }
00300 }
00309 inline
00310 void normalizeVector3(double* V0,double* V1,double* V2)
00311 {
00312 register double s;
00313 if (*V0>0) {
00314 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00315 *V0*=s;
00316 *V1*=s;
00317 *V2*=s;
00318 } else if (*V0<0) {
00319 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00320 *V0*=s;
00321 *V1*=s;
00322 *V2*=s;
00323 } else {
00324 if (*V1>0) {
00325 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00326 *V1*=s;
00327 *V2*=s;
00328 } else if (*V1<0) {
00329 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00330 *V1*=s;
00331 *V2*=s;
00332 } else {
00333 *V2=1.;
00334 }
00335 }
00336 }
00363 inline
00364 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
00365 const double A11, const double A12, const double A22,
00366 double* ev0, double* ev1, double* ev2,
00367 double* V00, double* V10, double* V20,
00368 double* V01, double* V11, double* V21,
00369 double* V02, double* V12, double* V22,
00370 const double tol)
00371 {
00372 register const double absA01=fabs(A01);
00373 register const double absA02=fabs(A02);
00374 register const double m=absA01>absA02 ? absA01 : absA02;
00375 if (m<=0) {
00376 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
00377 eigenvalues_and_eigenvectors2(A11,A12,A22,
00378 &TEMP_EV0,&TEMP_EV1,
00379 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
00380 if (A00<=TEMP_EV0) {
00381 *V00=1.;
00382 *V10=0.;
00383 *V20=0.;
00384 *V01=0.;
00385 *V11=TEMP_V00;
00386 *V21=TEMP_V10;
00387 *V02=0.;
00388 *V12=TEMP_V01;
00389 *V22=TEMP_V11;
00390 *ev0=A00;
00391 *ev1=TEMP_EV0;
00392 *ev2=TEMP_EV1;
00393 } else if (A00>TEMP_EV1) {
00394 *V02=1.;
00395 *V12=0.;
00396 *V22=0.;
00397 *V00=0.;
00398 *V10=TEMP_V00;
00399 *V20=TEMP_V10;
00400 *V01=0.;
00401 *V11=TEMP_V01;
00402 *V21=TEMP_V11;
00403 *ev0=TEMP_EV0;
00404 *ev1=TEMP_EV1;
00405 *ev2=A00;
00406 } else {
00407 *V01=1.;
00408 *V11=0.;
00409 *V21=0.;
00410 *V00=0.;
00411 *V10=TEMP_V00;
00412 *V20=TEMP_V10;
00413 *V02=0.;
00414 *V12=TEMP_V01;
00415 *V22=TEMP_V11;
00416 *ev0=TEMP_EV0;
00417 *ev1=A00;
00418 *ev2=TEMP_EV1;
00419 }
00420 } else {
00421 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
00422 const register double absev0=fabs(*ev0);
00423 const register double absev1=fabs(*ev1);
00424 const register double absev2=fabs(*ev2);
00425 register double max_ev=absev0>absev1 ? absev0 : absev1;
00426 max_ev=max_ev>absev2 ? max_ev : absev2;
00427 const register double d_01=fabs((*ev0)-(*ev1));
00428 const register double d_12=fabs((*ev1)-(*ev2));
00429 const register double max_d=d_01>d_12 ? d_01 : d_12;
00430 if (max_d<=tol*max_ev) {
00431 *V00=1.;
00432 *V10=0;
00433 *V20=0;
00434 *V01=0;
00435 *V11=1.;
00436 *V21=0;
00437 *V02=0;
00438 *V12=0;
00439 *V22=1.;
00440 } else {
00441 const register double S00=A00-(*ev0);
00442 const register double absS00=fabs(S00);
00443 if (absS00>m) {
00444 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
00445 } else if (absA02<m) {
00446 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
00447 } else {
00448 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
00449 }
00450 normalizeVector3(V00,V10,V20);;
00451 const register double T00=A00-(*ev2);
00452 const register double absT00=fabs(T00);
00453 if (absT00>m) {
00454 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
00455 } else if (absA02<m) {
00456 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
00457 } else {
00458 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
00459 }
00460 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
00461 *V02-=dot*(*V00);
00462 *V12-=dot*(*V10);
00463 *V22-=dot*(*V20);
00464 normalizeVector3(V02,V12,V22);
00465 *V01=(*V10)*(*V22)-(*V12)*(*V20);
00466 *V11=(*V20)*(*V02)-(*V00)*(*V22);
00467 *V21=(*V00)*(*V12)-(*V02)*(*V10);
00468 normalizeVector3(V01,V11,V21);
00469 }
00470 }
00471 }
00472
00473
00474
00475 inline
00476 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
00477 {
00478 if (transpose == 0) {
00479 for (int i=0; i<SL; i++) {
00480 for (int j=0; j<SR; j++) {
00481 double sum = 0.0;
00482 for (int l=0; l<SM; l++) {
00483 sum += A[i+SL*l] * B[l+SM*j];
00484 }
00485 C[i+SL*j] = sum;
00486 }
00487 }
00488 }
00489 else if (transpose == 1) {
00490 for (int i=0; i<SL; i++) {
00491 for (int j=0; j<SR; j++) {
00492 double sum = 0.0;
00493 for (int l=0; l<SM; l++) {
00494 sum += A[i*SM+l] * B[l+SM*j];
00495 }
00496 C[i+SL*j] = sum;
00497 }
00498 }
00499 }
00500 else if (transpose == 2) {
00501 for (int i=0; i<SL; i++) {
00502 for (int j=0; j<SR; j++) {
00503 double sum = 0.0;
00504 for (int l=0; l<SM; l++) {
00505 sum += A[i+SL*l] * B[l*SR+j];
00506 }
00507 C[i+SL*j] = sum;
00508 }
00509 }
00510 }
00511 }
00512
00513 template <typename UnaryFunction>
00514 inline void tensor_unary_operation(const int size,
00515 const double *arg1,
00516 double * argRes,
00517 UnaryFunction operation)
00518 {
00519 for (int i = 0; i < size; ++i) {
00520 argRes[i] = operation(arg1[i]);
00521 }
00522 return;
00523 }
00524
00525 template <typename BinaryFunction>
00526 inline void tensor_binary_operation(const int size,
00527 const double *arg1,
00528 const double *arg2,
00529 double * argRes,
00530 BinaryFunction operation)
00531 {
00532 for (int i = 0; i < size; ++i) {
00533 argRes[i] = operation(arg1[i], arg2[i]);
00534 }
00535 return;
00536 }
00537
00538 template <typename BinaryFunction>
00539 inline void tensor_binary_operation(const int size,
00540 double arg1,
00541 const double *arg2,
00542 double *argRes,
00543 BinaryFunction operation)
00544 {
00545 for (int i = 0; i < size; ++i) {
00546 argRes[i] = operation(arg1, arg2[i]);
00547 }
00548 return;
00549 }
00550
00551 template <typename BinaryFunction>
00552 inline void tensor_binary_operation(const int size,
00553 const double *arg1,
00554 double arg2,
00555 double *argRes,
00556 BinaryFunction operation)
00557 {
00558 for (int i = 0; i < size; ++i) {
00559 argRes[i] = operation(arg1[i], arg2);
00560 }
00561 return;
00562 }
00563
00564 }
00565 #endif