00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef INC_PASO_BLOCKOPS
00016 #define INC_PASO_BLOCKOPS
00017
00018 #include "Common.h"
00019 #include "Paso.h"
00020
00021 #ifdef USE_LAPACK
00022 #ifdef MKL_LAPACK
00023 #include <mkl_lapack.h>
00024 #include <mkl_cblas.h>
00025 #else
00026 #include <clapack.h>
00027 #include <cblas.h>
00028 #endif
00029 #endif
00030
00031 void Paso_BlockOps_solveAll(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x);
00032
00033 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")
00034
00035
00036
00037 #define Paso_BlockOps_Cpy_2(R, V) \
00038 {\
00039 *(R+0)=*(V+0);\
00040 *(R+1)=*(V+1);\
00041 }
00042
00043 #define Paso_BlockOps_Cpy_3(R, V) \
00044 {\
00045 *(R+0)=*(V+0);\
00046 *(R+1)=*(V+1);\
00047 *(R+2)=*(V+2);\
00048 }
00049
00050 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
00051
00052
00053
00054
00055 #define Paso_BlockOps_SMV_2(R,MAT, V) \
00056 {\
00057 register double S1=*(V+0); \
00058 register double S2=*(V+1);\
00059 register double A11=*(MAT+0);\
00060 register double A12=*(MAT+2);\
00061 register double A21=*(MAT+1);\
00062 register double A22=*(MAT+3);\
00063 *(R+0)-=A11 * S1 + A12 * S2;\
00064 *(R+1)-=A21 * S1 + A22 * S2;\
00065 }
00066
00067 #define Paso_BlockOps_SMV_3(R, MAT, V) \
00068 {\
00069 register double S1=*(V+0);\
00070 register double S2=*(V+1);\
00071 register double S3=*(V+2);\
00072 register double A11=*(MAT+0);\
00073 register double A21=*(MAT+1);\
00074 register double A31=*(MAT+2);\
00075 register double A12=*(MAT+3);\
00076 register double A22=*(MAT+4);\
00077 register double A32=*(MAT+5);\
00078 register double A13=*(MAT+6);\
00079 register double A23=*(MAT+7);\
00080 register double A33=*(MAT+8);\
00081 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
00082 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
00083 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
00084 }
00085
00086
00087
00088
00089 #ifdef USE_LAPACK
00090 #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)
00091 #else
00092 #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
00093 #endif
00094
00095 #ifdef USE_LAPACK
00096 #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)
00097 #else
00098 #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
00099 #endif
00100
00101 #define Paso_BlockOps_invM_2(invA, A, failed) \
00102 {\
00103 register double A11=*(A+0);\
00104 register double A12=*(A+2);\
00105 register double A21=*(A+1);\
00106 register double A22=*(A+3);\
00107 register double D = A11*A22-A12*A21; \
00108 if (ABS(D) > 0 ){\
00109 D=1./D;\
00110 *(invA)= A22*D;\
00111 *(invA+1)=-A21*D;\
00112 *(invA+2)=-A12*D;\
00113 *(invA+3)= A11*D;\
00114 } else {\
00115 *failed=1;\
00116 }\
00117 }
00118
00119 #define Paso_BlockOps_invM_3(invA, A, failed) \
00120 {\
00121 register double A11=*(A+0);\
00122 register double A21=*(A+1);\
00123 register double A31=*(A+2);\
00124 register double A12=*(A+3);\
00125 register double A22=*(A+4);\
00126 register double A32=*(A+5);\
00127 register double A13=*(A+6);\
00128 register double A23=*(A+7);\
00129 register double A33=*(A+8);\
00130 register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
00131 if (ABS(D) > 0 ){\
00132 D=1./D;\
00133 *(invA )=(A22*A33-A23*A32)*D;\
00134 *(invA+1)=(A31*A23-A21*A33)*D;\
00135 *(invA+2)=(A21*A32-A31*A22)*D;\
00136 *(invA+3)=(A13*A32-A12*A33)*D;\
00137 *(invA+4)=(A11*A33-A31*A13)*D;\
00138 *(invA+5)=(A12*A31-A11*A32)*D;\
00139 *(invA+6)=(A12*A23-A13*A22)*D;\
00140 *(invA+7)=(A13*A21-A11*A23)*D;\
00141 *(invA+8)=(A11*A22-A12*A21)*D;\
00142 } else {\
00143 *failed=1;\
00144 }\
00145 }
00146
00147 #ifdef USE_LAPACK
00148 #ifdef MKL_LAPACK
00149 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
00150 {\
00151 int NN=N; \
00152 int res =0; \
00153 dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
00154 if (res!=0) *failed=1;\
00155 }
00156 #else
00157 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
00158 {\
00159 int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
00160 if (res!=0) *failed=1;\
00161 }
00162 #endif
00163 #else
00164 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
00165 #endif
00166
00167 #ifdef USE_LAPACK
00168 #ifdef MKL_LAPACK
00169 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
00170 {\
00171 int NN=N; \
00172 int res =0; \
00173 int ONE=1; \
00174 dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
00175 if (res!=0) *failed=1;\
00176 }
00177 #else
00178 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
00179 {\
00180 int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
00181 if (res!=0) *failed=1;\
00182 }
00183 #endif
00184 #else
00185 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
00186 #endif
00187
00188
00189
00190
00191 #define Paso_BlockOps_MViP_2(MAT, V) \
00192 { \
00193 register double S1=*(V+0);\
00194 register double S2=*(V+1);\
00195 register double A11=*(MAT+0);\
00196 register double A12=*(MAT+2);\
00197 register double A21=*(MAT+1);\
00198 register double A22=*(MAT+3);\
00199 *(V+0) = A11 * S1 + A12 * S2;\
00200 *(V+1) = A21 * S1 + A22 * S2;\
00201 }
00202
00203
00204 #define Paso_BlockOps_MViP_3(MAT, V) \
00205 { \
00206 register double S1=*(V+0);\
00207 register double S2=*(V+1);\
00208 register double S3=*(V+2);\
00209 register double A11=*(MAT+0);\
00210 register double A21=*(MAT+1);\
00211 register double A31=*(MAT+2);\
00212 register double A12=*(MAT+3);\
00213 register double A22=*(MAT+4);\
00214 register double A32=*(MAT+5);\
00215 register double A13=*(MAT+6);\
00216 register double A23=*(MAT+7);\
00217 register double A33=*(MAT+8);\
00218 *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
00219 *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
00220 *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\
00221 }
00222
00223
00224 #endif