GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize > Struct Template Reference#
ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize > Struct Template Reference
#include <gridwise_welford_second_half_layernorm2d.hpp>
Public Types | |
| using | ThreadClusterLengths_M_N = Sequence<MThreadClusterSize, NThreadClusterSize> |
| using | ThreadBufferDimAccessOrder = Sequence<0, 1> |
| using | ThreadClusterArrangeOrder = Sequence<0, 1> |
| using | ThreadBufferLengths_M_N = Sequence<MThreadSliceSize, NThreadSliceSize> |
| using | ThreadBufferLengths_M_1 = Sequence<MThreadSliceSize, 1> |
| using | ThreadBufferLengths_N = Sequence<NThreadSliceSize> |
| using | ThreadWelfordSrcDesc_M_1 = decltype(thread_buffer_desc_m_1) |
| using | ThreadWelfordDstDesc_M |
| using | ThreadwiseWelford |
| using | BlockwiseWelford |
Static Public Member Functions | |
| static __device__ void | Run (const EMeanVarDataType *__restrict__ p_e_grid, const EMeanVarDataType *__restrict__ p_in_welford_mean_grid, const EMeanVarDataType *__restrict__ p_in_welford_var_grid, const int32_t *__restrict__ p_in_welford_count_grid, const GammaDataType *__restrict__ p_gamma_grid, const BetaDataType *__restrict__ p_beta_grid, HDataType *__restrict__ p_h_grid, const EHGridDesc_M_N &e_grid_desc_m_n, const EHGridDesc_M_N &h_grid_desc_m_n, const MeanVarGridDesc_M_NBlock &mean_var_grid_desc_m_nblock, const CountGridDesc_M_NBlock &count_grid_desc_m_nblock, const GammaBetaGridDesc_N &gamma_grid_desc_n, const GammaBetaGridDesc_N &beta_grid_desc_n, index_t numMeanVarCountBlockTileIteration_N, index_t NBlockClusterLength, ComputeDataType epsilon, HElementwiseOperation h_element_op) |
Static Public Attributes | |
| static constexpr auto | thread_cluster_desc_m_n |
| static constexpr auto | thread_buffer_desc_m_n |
| static constexpr auto | thread_buffer_desc_m_1 |
| static constexpr auto | thread_buffer_desc_n |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr index_t | M_BlockTileSize = MThreadClusterSize * MThreadSliceSize |
| static constexpr index_t | N_BlockTileSize = NThreadClusterSize * NThreadSliceSize |
Member Typedef Documentation
◆ BlockwiseWelford
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::BlockwiseWelford |
Initial value:
BlockwiseWelford<ComputeDataType,
BlockSize,
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, false > BlockwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:82
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_welford_first_half.hpp:68
Sequence< MThreadClusterSize, NThreadClusterSize > ThreadClusterLengths_M_N
Definition gridwise_welford_second_half_layernorm2d.hpp:51
◆ ThreadBufferDimAccessOrder
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferDimAccessOrder = Sequence<0, 1> |
◆ ThreadBufferLengths_M_1
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferLengths_M_1 = Sequence<MThreadSliceSize, 1> |
◆ ThreadBufferLengths_M_N
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferLengths_M_N = Sequence<MThreadSliceSize, NThreadSliceSize> |
◆ ThreadBufferLengths_N
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadBufferLengths_N = Sequence<NThreadSliceSize> |
◆ ThreadClusterArrangeOrder
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadClusterArrangeOrder = Sequence<0, 1> |
◆ ThreadClusterLengths_M_N
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadClusterLengths_M_N = Sequence<MThreadClusterSize, NThreadClusterSize> |
◆ ThreadWelfordDstDesc_M
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadWelfordDstDesc_M |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
◆ ThreadWelfordSrcDesc_M_1
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadWelfordSrcDesc_M_1 = decltype(thread_buffer_desc_m_1) |
◆ ThreadwiseWelford
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
| using ck::GridwiseWelfordSecondHalfLayernorm2d< EMeanVarDataType, HDataType, GammaDataType, BetaDataType, ComputeDataType, EHGridDesc_M_N, MeanVarGridDesc_M_NBlock, CountGridDesc_M_NBlock, GammaBetaGridDesc_N, HElementwiseOperation, BlockSize, MThreadClusterSize, NThreadClusterSize, MThreadSliceSize, NThreadSliceSize, ESrcVectorSize, HDstVectorSize, GammaSrcVectorSize, BetaSrcVectorSize >::ThreadwiseWelford |
Initial value:
Definition threadwise_welford.hpp:83
Member Function Documentation
◆ Run()
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
inlinestatic |
Member Data Documentation
◆ I0
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
◆ I1
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
◆ M_BlockTileSize
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
◆ N_BlockTileSize
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
◆ thread_buffer_desc_m_1
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
Initial value:
◆ thread_buffer_desc_m_n
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
Initial value:
◆ thread_buffer_desc_n
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
Initial value:
◆ thread_cluster_desc_m_n
template<typename EMeanVarDataType, typename HDataType, typename GammaDataType, typename BetaDataType, typename ComputeDataType, typename EHGridDesc_M_N, typename MeanVarGridDesc_M_NBlock, typename CountGridDesc_M_NBlock, typename GammaBetaGridDesc_N, typename HElementwiseOperation, index_t BlockSize, index_t MThreadClusterSize, index_t NThreadClusterSize, index_t MThreadSliceSize, index_t NThreadSliceSize, index_t ESrcVectorSize, index_t HDstVectorSize, index_t GammaSrcVectorSize, index_t BetaSrcVectorSize>
|
staticconstexpr |
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< 0, 1 > ThreadClusterArrangeOrder
Definition gridwise_welford_second_half_layernorm2d.hpp:53
The documentation for this struct was generated from the following file: