00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00040 #ifndef MM_KERNEL_OUTER_A_H
00041 #define MM_KERNEL_OUTER_A_H
00042 #include "common.h"
00043 #ifdef _OPENMP
00044 #include <omp.h>
00045 #endif
00046
00052 template<typename T_gemm_kernel, int T_M_block, int T_N_block>
00053 class MM_kernel_outer_A {
00054 template<int T_rows_block, int T_cols_block, typename T_ordering_block, typename T_pack_type_kernel>
00055 class Pack;
00056 public:
00057 static int const M_kernel = T_gemm_kernel::M;
00058 static int const N_kernel = T_gemm_kernel::N;
00059 static int const K_kernel = T_gemm_kernel::K;
00060 static int const M_block = T_M_block;
00061 static int const N_block = T_N_block;
00062 static int const K_block = 1;
00063 static int const M = M_kernel * M_block;
00064 static int const N = N_kernel * N_block;
00065 static int const K = K_kernel * K_block;
00066 typedef typename T_gemm_kernel::real real;
00068 typedef Ordering_col_wise Ordering_block_A;
00069 typedef Ordering_col_wise Ordering_block_B;
00070 typedef Ordering_col_wise Ordering_block_C;
00071
00072 typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > Pack_type_A;
00073 typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > Pack_type_B;
00074 typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > Pack_type_C;
00078 static void exec( real const * const * const A,
00079 real const * const * const B,
00080 real * const C,
00081 int const i = 1);
00082
00083 };
00084
00085 template<typename T_gemm_kernel, int T_M_block, int T_N_block>
00086 void MM_kernel_outer_A<T_gemm_kernel, T_M_block, T_N_block>::exec( real const * const * const A,
00087 real const * const * const B,
00088 real * const C,
00089 int const n_mul ) {
00090 #if 1
00091 for ( int n = 0; n < N_block; ++n )
00092 for ( int m = 0; m < M_block; ++m ) {
00093 T_gemm_kernel::exec( A, B, C, n_mul,
00094 Ordering_block_A::get( m, 0, M_block, K_block ) * T_gemm_kernel::Pack_type_A::size_packed,
00095 Ordering_block_B::get( 0, n, K_block, N_block ) * T_gemm_kernel::Pack_type_B::size_packed,
00096 Ordering_block_C::get( m, n, M_block, N_block ) * T_gemm_kernel::Pack_type_C::size_packed );
00097 }
00098
00099 #else
00100 #if 1
00101
00102 T_gemm_kernel::template exec<Ordering_block_A::template Get<0, 0, M_block, K_block>::index * T_gemm_kernel::Pack_type_A::size_packed,
00103 Ordering_block_B::template Get<0, 0, K_block, N_block>::index * T_gemm_kernel::Pack_type_B::size_packed,
00104 Ordering_block_C::template Get<0, 0, M_block, N_block>::index * T_gemm_kernel::Pack_type_C::size_packed>( A, B, C, n_mul );
00105 T_gemm_kernel::template exec<Ordering_block_A::template Get<1, 0, M_block, K_block>::index * T_gemm_kernel::Pack_type_A::size_packed,
00106 Ordering_block_B::template Get<0, 0, K_block, N_block>::index * T_gemm_kernel::Pack_type_B::size_packed,
00107 Ordering_block_C::template Get<1, 0, M_block, N_block>::index * T_gemm_kernel::Pack_type_C::size_packed>( A, B, C, n_mul );
00108 #else
00109 T_gemm_kernel::exec( A, B, C, n_mul,
00110 Ordering_block_A::get( 0, 0, M_block, K_block ) * T_gemm_kernel::Pack_type_A::size_packed,
00111 Ordering_block_B::get( 0, 0, K_block, N_block ) * T_gemm_kernel::Pack_type_B::size_packed,
00112 Ordering_block_C::get( 0, 0, M_block, N_block ) * T_gemm_kernel::Pack_type_C::size_packed );
00113 #endif
00114 #endif
00115 }
00116
00117
00127 template<typename T_gemm_kernel, int T_M_block, int T_N_block>
00128 template<int T_rows_block, int T_cols_block, typename T_ordering_block, typename T_pack_type_kernel>
00129 class MM_kernel_outer_A<T_gemm_kernel, T_M_block, T_N_block>::Pack {
00130 static int const rows_kernel = T_pack_type_kernel::rows;
00131 static int const cols_kernel = T_pack_type_kernel::cols;
00132 public:
00133 static int const rows = rows_kernel * T_rows_block;
00134 static int const cols = cols_kernel * T_cols_block;
00138
00139 static unsigned int const size_packed = T_rows_block * T_cols_block * T_pack_type_kernel::size_packed;
00140
00141
00142 template<typename T_ordering_matrix>
00143 struct Assign_to_packed : public T_pack_type_kernel::template Assign_to_packed<T_ordering_matrix> {
00144 typedef T_ordering_matrix Ordering_matrix;
00145 };
00146 template<typename T_ordering_matrix>
00147 struct Extract_from_packed : public T_pack_type_kernel::template Extract_from_packed<T_ordering_matrix> {
00148 typedef T_ordering_matrix Ordering_matrix;
00149 };
00150
00151
00156 template<template<typename T_ordering> class T_assign, typename T_ordering_matrix>
00157 static void exec(typename T_assign<T_ordering_matrix>::PtrType X, typename T_assign<T_ordering_matrix>::PtrTypePacked X_packed,
00158 int const rows_total_matrix, int const cols_total_matrix) {
00159
00160 for ( int col_b = 0; col_b < T_cols_block; ++col_b ) {
00161
00162 for ( int row_b = 0; row_b < T_rows_block; ++row_b ) {
00163 T_pack_type_kernel::template exec< T_assign, T_ordering_matrix >
00164 ( &X[ T_assign<T_ordering_matrix>::Ordering_matrix::get( row_b * rows_kernel, col_b * cols_kernel,
00165 rows_total_matrix, cols_total_matrix ) ],
00166 &X_packed[ T_ordering_block::get( row_b, col_b, T_rows_block, T_cols_block ) *
00167 T_pack_type_kernel::size_packed ],
00168 rows_total_matrix, cols_total_matrix );
00169
00170
00171
00172 }
00173 }
00174 }
00175
00180 template<typename T_ordering_matrix>
00181 inline static void pack(real const * const X, real * X_packed,
00182 int const rows_total_matrix, int const cols_total_matrix) {
00183 exec< Assign_to_packed, T_ordering_matrix >(X, X_packed, rows_total_matrix, cols_total_matrix);
00184 }
00189 template<typename T_ordering_matrix>
00190 inline static void unpack(real * X, real const * const X_packed,
00191 int const rows_total_matrix, int const cols_total_matrix) {
00192 exec< Extract_from_packed, T_ordering_matrix >(X, X_packed, rows_total_matrix, cols_total_matrix);
00193 }
00194
00195
00196 };
00197 #endif