Template for matrix matrix multiplication that wraps around a kernel given as template argument. More...
#include <mm_kernel_outer_A.h>
Classes | |
class | Pack |
Template for for translations between unpacked and packed matrix storage. More... | |
Public Types | |
typedef T_gemm_kernel::real | real |
Real number type (usually float or double). | |
typedef Ordering_col_wise | Ordering_block_A |
typedef Ordering_col_wise | Ordering_block_B |
typedef Ordering_col_wise | Ordering_block_C |
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > | Pack_type_A |
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > | Pack_type_B |
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > | Pack_type_C |
Static Public Member Functions | |
static void | exec (real const *const *const A, real const *const *const B, real *const C, int const i=1) |
Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class. | |
Static Public Attributes | |
static int const | M_kernel = T_gemm_kernel::M |
Number of rows of A and C kernels. | |
static int const | N_kernel = T_gemm_kernel::N |
Number of columns of B and C kernels. | |
static int const | K_kernel = T_gemm_kernel::K |
Number of columns of A kernels and rows of B kernels. | |
static int const | M_block = T_M_block |
Number of rows of A and C (blocks). | |
static int const | N_block = T_N_block |
Number of columns of B and C (blocks). | |
static int const | K_block = 1 |
Number of columns of A and rows of B (blocks). | |
static int const | M = M_kernel * M_block |
Number of rows of A and C. | |
static int const | N = N_kernel * N_block |
Number of columns of B and C. | |
static int const | K = K_kernel * K_block |
Number of columns of A and rows of B. |
Template for matrix matrix multiplication that wraps around a kernel given as template argument.
The idea is that the inner kernel should be fully unrolled and block for registers.
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_A |
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_B |
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_C |
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_A |
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_B |
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_C |
typedef T_gemm_kernel::real MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::real |
Real number type (usually float or double).
void MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec | ( | real const *const *const | A, | |
real const *const *const | B, | |||
real *const | C, | |||
int const | i = 1 | |||
) | [inline, static] |
Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class.
References Ordering_col_wise::get(), MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_block, MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_block, and MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_block.
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K = K_kernel * K_block [static] |
Number of columns of A and rows of B.
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_block = 1 [static] |
Number of columns of A and rows of B (blocks).
Referenced by MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec().
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_kernel = T_gemm_kernel::K [static] |
Number of columns of A kernels and rows of B kernels.
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M = M_kernel * M_block [static] |
Number of rows of A and C.
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_block = T_M_block [static] |
Number of rows of A and C (blocks).
Referenced by MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec().
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_kernel = T_gemm_kernel::M [static] |
Number of rows of A and C kernels.
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N = N_kernel * N_block [static] |
Number of columns of B and C.
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_block = T_N_block [static] |
Number of columns of B and C (blocks).
Referenced by MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec().
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_kernel = T_gemm_kernel::N [static] |
Number of columns of B and C kernels.