MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block > Class Template Reference

Template for matrix matrix multiplication that wraps around a kernel given as template argument. More...

#include <mm_kernel_outer_A.h>

List of all members.

Classes

class  Pack
 Template for for translations between unpacked and packed matrix storage. More...

Public Types

typedef T_gemm_kernel::real real
 Real number type (usually float or double).
typedef Ordering_col_wise Ordering_block_A
typedef Ordering_col_wise Ordering_block_B
typedef Ordering_col_wise Ordering_block_C
typedef Pack< M_block, K_block,
Ordering_block_A, typename
T_gemm_kernel::Pack_type_A > 
Pack_type_A
typedef Pack< K_block, N_block,
Ordering_block_B, typename
T_gemm_kernel::Pack_type_B > 
Pack_type_B
typedef Pack< M_block, N_block,
Ordering_block_C, typename
T_gemm_kernel::Pack_type_C > 
Pack_type_C

Static Public Member Functions

static void exec (real const *const *const A, real const *const *const B, real *const C, int const i=1)
 Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class.

Static Public Attributes

static int const M_kernel = T_gemm_kernel::M
 Number of rows of A and C kernels.
static int const N_kernel = T_gemm_kernel::N
 Number of columns of B and C kernels.
static int const K_kernel = T_gemm_kernel::K
 Number of columns of A kernels and rows of B kernels.
static int const M_block = T_M_block
 Number of rows of A and C (blocks).
static int const N_block = T_N_block
 Number of columns of B and C (blocks).
static int const K_block = 1
 Number of columns of A and rows of B (blocks).
static int const M = M_kernel * M_block
 Number of rows of A and C.
static int const N = N_kernel * N_block
 Number of columns of B and C.
static int const K = K_kernel * K_block
 Number of columns of A and rows of B.

Detailed Description

template<typename T_gemm_kernel, int T_M_block, int T_N_block>
class MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >

Template for matrix matrix multiplication that wraps around a kernel given as template argument.

The idea is that the inner kernel should be fully unrolled and block for registers.


Member Typedef Documentation

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_A
template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_B
template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_C
template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_A
template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_B
template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_C
template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef T_gemm_kernel::real MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::real

Real number type (usually float or double).


Member Function Documentation

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
void MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec ( real const *const *const   A,
real const *const *const   B,
real *const   C,
int const   i = 1 
) [inline, static]

Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class.

References Ordering_col_wise::get(), MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_block, MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_block, and MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_block.


Member Data Documentation

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K = K_kernel * K_block [static]

Number of columns of A and rows of B.

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_block = 1 [static]

Number of columns of A and rows of B (blocks).

Referenced by MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec().

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_kernel = T_gemm_kernel::K [static]

Number of columns of A kernels and rows of B kernels.

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M = M_kernel * M_block [static]

Number of rows of A and C.

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_block = T_M_block [static]

Number of rows of A and C (blocks).

Referenced by MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec().

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_kernel = T_gemm_kernel::M [static]

Number of rows of A and C kernels.

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N = N_kernel * N_block [static]

Number of columns of B and C.

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_block = T_N_block [static]

Number of columns of B and C (blocks).

Referenced by MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec().

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_kernel = T_gemm_kernel::N [static]

Number of columns of B and C kernels.


The documentation for this class was generated from the following file:

Generated on 10 Jul 2018 for ergo by  doxygen 1.6.1