Data Transfers
From memory:
load values from memory (optionally masked) |
|
load values from aligned memory |
|
load values from unaligned memory |
|
load values, forcing a type conversion |
From a scalar:
broadcasting a value to all slots |
|
broadcasting a value, forcing a type conversion |
To memory:
store values to memory (optionally masked) |
|
store values to aligned memory |
|
store values to unaligned memory |
|
store values, forcing a type conversion |
In place:
rearrange slots within the batch |
|
bitwise shift the whole batch to the left |
|
bitwise shift the whole batch to the right |
|
bitwise rotate the whole batch to the left |
|
bitwise rotate the whole batch to the right |
|
modify a single batch slot |
|
pack elements according to a mask |
|
select contiguous elements from the batch |
Between batches:
tranpose a matrix as an array of batches |
|
interleave low halves of two batches |
|
interleave high halves of two batches |
-
template<class T, class A = default_arch>
inline kernel::detail::broadcaster<T, A>::return_type broadcast(T v) noexcept Creates a batch from the single value
v.If
vis a boolean, this function returns a batch_bool<uint8_t>. If you need another type of batch_bool, please usebroadcast_asinstead.- Parameters:
v – the value used to initialize the batch
- Returns:
a new batch instance
-
template<class To, class A = default_arch, class From>
inline simd_return_type<From, To, A> broadcast_as(From v) noexcept Creates a batch from the single value
vand the specified batch value typeTo.- Parameters:
v – the value used to initialize the batch
- Returns:
a new batch instance
-
template<class T, class A>
inline batch<T, A> compress(batch<T, A> const &x, batch_bool<T, A> const &mask) noexcept Pick elements from
xselected bymask, and append them to the resulting vector, zeroing the remaining slots.
-
template<class T, class A>
inline batch<T, A> expand(batch<T, A> const &x, batch_bool<T, A> const &mask) noexcept Load contiguous elements from
xand place them in slots selected bymask, zeroing the other slots.
-
template<class T, class A, size_t I>
inline batch<T, A> insert(batch<T, A> const &x, T val, index<I> pos) noexcept Create a new batch equivalent to
xbut with elementvalset at positionpos.- Parameters:
x – batch
val – value to set
pos – index of the updated slot
- Returns:
copy of
xwith positionposset toval
-
template<class To, class A = default_arch, class From>
inline simd_return_type<From, To, A> load_as(From const *ptr, aligned_mode) noexcept Creates a batch from the buffer
ptrand the specifed batch value typeTo.The memory needs to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class To, class A = default_arch, class From>
inline simd_return_type<From, To, A> load_as(From const *ptr, unaligned_mode) noexcept Creates a batch from the buffer
ptrand the specifed batch value typeTo.The memory does not need to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load(From const *ptr, aligned_mode = {}) noexcept Creates a batch from the buffer
ptr.The memory needs to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load(From const *ptr, unaligned_mode) noexcept Creates a batch from the buffer
ptr.The memory does not need to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class T, class A = default_arch, bool... Values, class From>
inline batch<T, A> load(From const *ptr, batch_bool_constant<T, A, Values...> const &mask, aligned_mode = {}) noexcept Creates a batch from the buffer
ptrusing a mask.Elements corresponding to
falsein the mask are not accessed in memory and are zero-initialized in the resulting batch.- Parameters:
ptr – the memory buffer to read
mask – selection mask for the elements to load
- Returns:
a new batch instance
-
template<class T, class A = default_arch, bool... Values, class From>
inline batch<T, A> load(From const *ptr, batch_bool_constant<T, A, Values...> const &mask, unaligned_mode) noexcept Creates a batch from the buffer
ptrusing a mask.Elements corresponding to
falsein the mask are not accessed in memory and are zero-initialized in the resulting batch.- Parameters:
ptr – the memory buffer to read. The buffer does not need to be aligned.
mask – selection mask for the elements to load
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load_aligned(From const *ptr) noexcept Creates a batch from the buffer
ptr.The memory needs to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load_unaligned(From const *ptr) noexcept Creates a batch from the buffer
ptr.The memory does not need to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<size_t N, class T, class A>
inline batch<T, A> rotate_left(batch<T, A> const &x) noexcept Slide the whole batch to the left by
nelements, and reintroduce the slided out elements from the right.This is different from
rotlthat rotates each batch element to the left.- Template Parameters:
N – Amount of elements to rotate to the left.
- Parameters:
x – batch of integer values.
- Returns:
rotated batch.
-
template<size_t N, class T, class A>
inline batch<T, A> rotate_right(batch<T, A> const &x) noexcept Slide the whole batch to the right by
nelements, and reintroduce the slided out elements from the left.This is different from
rotrthat rotates each batch element to the right.- Template Parameters:
N – Amount of elements to rotate to the right.
- Parameters:
x – batch of integer values.
- Returns:
rotated batch.
-
template<class T, class A, class Vt, Vt... Values>
inline std::enable_if_t<std::is_arithmetic<T>::value, batch<T, A>> shuffle(batch<T, A> const &x, batch<T, A> const &y, batch_constant<Vt, A, Values...> mask) noexcept Combine elements from
xandyaccording to selectormask.- Parameters:
x – batch
y – batch
mask – constant batch mask of integer elements of the same size as element of
xandy. Each element of the mask index the vector that would be formed by the concatenation ofxandy. For instancePicksbatch_constant<uint32_t, sse2, 0, 4, 3, 7>
x[0],y[0],x[3],y[3]
- Returns:
combined batch
-
template<size_t N, class T, class A>
inline batch<T, A> slide_left(batch<T, A> const &x) noexcept Slide the whole batch to the left by
nbytes.This is different from
bitwise_lshiftthat shifts each batch element to the left.Warning
The behavior of this function is platform-dependent on big endian architectures.
- Template Parameters:
N – Amount of bytes to slide to the left.
- Parameters:
x – batch of integer values.
- Returns:
slided batch.
-
template<size_t N, class T, class A>
inline batch<T, A> slide_right(batch<T, A> const &x) noexcept Slide the whole batch to the right by
Nbytes.This is different from
bitwise_rshiftthat shifts each batch element to the right.Warning
The behavior of this function is platform-dependent on big endian architectures.
- Template Parameters:
N – Amount of bytes to slide to the right.
- Parameters:
x – batch of integer values.
- Returns:
slided batch.
-
template<class To, class A = default_arch, class From>
inline void store_as(To *dst, batch<From, A> const &src, aligned_mode) noexcept Copy content of batch
srcto the bufferdst.The memory needs to be aligned.
- Parameters:
dst – the memory buffer to write to
src – the batch to copy
-
template<class To, class A = default_arch, class From>
inline void store_as(To *dst, batch<From, A> const &src, unaligned_mode) noexcept Copy content of batch
srcto the bufferdst.The memory does not need to be aligned.
- Parameters:
dst – the memory buffer to write to
src – the batch to copy
-
template<class A, class T>
inline void store(T *mem, batch<T, A> const &val, aligned_mode = {}) noexcept Copy content of batch
valto the buffermem.The memory does not need to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
-
template<class A, class T>
inline void store(T *mem, batch<T, A> const &val, unaligned_mode) noexcept Copy content of batch
valto the buffermem.The memory does not need to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
-
template<class T, class A = default_arch, bool... Values>
inline void store(T *mem, batch<T, A> const &val, batch_bool_constant<T, A, Values...> const &mask, aligned_mode = {}) noexcept Copy selected elements of batch
valto the buffermemusing a mask.Elements corresponding to
falsein the mask are not written to memory.- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
mask – selection mask for the elements to store
-
template<class T, class A = default_arch, bool... Values>
inline void store(T *mem, batch<T, A> const &val, batch_bool_constant<T, A, Values...> const &mask, unaligned_mode) noexcept Copy selected elements of batch
valto the buffermemusing a mask.Elements corresponding to
falsein the mask are not written to memory.- Parameters:
mem – the memory buffer to write to. The buffer does not need to be aligned.
val – the batch to copy from
mask – selection mask for the elements to store
-
template<class A, class T>
inline void store_aligned(T *mem, batch<T, A> const &val) noexcept Copy content of batch
valto the buffermem.The memory needs to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
-
template<class A, class T>
inline void store_unaligned(T *mem, batch<T, A> const &val) noexcept Copy content of batch
valto the buffermem.The memory does not need to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy
-
template<class T, class A, class Vt, Vt... Values>
inline std::enable_if_t<std::is_arithmetic<T>::value, batch<T, A>> swizzle(batch<T, A> const &x, batch_constant<Vt, A, Values...> mask) noexcept Rearrange elements from
xaccording to constant maskmask.- Parameters:
x – batch
mask – constant batch mask of integer elements of the same size as element of
x
- Returns:
swizzled batch
-
template<class T, class A, class Vt>
inline std::enable_if_t<std::is_arithmetic<T>::value, batch<T, A>> swizzle(batch<T, A> const &x, batch<Vt, A> mask) noexcept Rearrange elements from
xaccording to maskmask.- Parameters:
x – batch
mask – batch mask of integer elements of the same size as element of
x
- Returns:
swizzled batch
-
template<class T, class A>
inline void transpose(batch<T, A> *matrix_begin, batch<T, A> *matrix_end) noexcept Transposes in place the matrix whose line are each of the batch passed as argument.
- Parameters:
matrix_begin – pointer to the first line of the matrix to transpose
matrix_end – pointer to one element after the last line of the matrix to transpose
-
template<class T, class A>
inline batch<T, A> zip_hi(batch<T, A> const &x, batch<T, A> const &y) noexcept Unpack and interleave data from the HIGH half of batches
xandy.Store the results in the Return value.
- Parameters:
x – a batch of integer or floating point or double precision values.
y – a batch of integer or floating point or double precision values.
- Returns:
a batch of the high part of shuffled values.
-
template<class T, class A>
inline batch<T, A> zip_lo(batch<T, A> const &x, batch<T, A> const &y) noexcept Unpack and interleave data from the LOW half of batches
xandy.Store the results in the Return value.
- Parameters:
x – a batch of integer or floating point or double precision values.
y – a batch of integer or floating point or double precision values.
- Returns:
a batch of the low part of shuffled values.
The following empty types are used for tag dispatching:
-
struct aligned_mode
tag for load and store of aligned memory.
-
struct unaligned_mode
tag for load and store of unaligned memory.