Grok  10.0.3
ojph_arch.h
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_arch.h
34 // Author: Aous Naman
35 // Date: 28 August 2019
36 //***************************************************************************/
37 
38 
39 #ifndef OJPH_ARCH_H
40 #define OJPH_ARCH_H
41 
42 #include <cstdio>
43 #include <cstdint>
44 #include <cmath>
45 
46 #include "ojph_defs.h"
47 
48 
50 // preprocessor directives for compiler
52 #ifdef _MSC_VER
53 #define OJPH_COMPILER_MSVC
54 #elif (defined __GNUC__)
55 #define OJPH_COMPILER_GNUC
56 #endif
57 
58 #ifdef __EMSCRIPTEN__
59 #define OJPH_EMSCRIPTEN
60 #endif
61 
62 #ifdef OJPH_COMPILER_MSVC
63 #include <intrin.h>
64 #endif
65 
66 namespace ojph {
67 
69  // OS detection definitions
71 #if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
72 #define OJPH_OS_WINDOWS
73 #elif (defined __APPLE__)
74 #define OJPH_OS_APPLE
75 #elif (defined __linux)
76 #define OJPH_OS_LINUX
77 #endif
78 
80  // defines for dll
82 #if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
83 #define OJPH_EXPORT __declspec(dllexport)
84 #else
85 #define OJPH_EXPORT
86 #endif
87 
89  // cpu features
93 
94  enum : int {
107  };
108 
110  static inline ui32 population_count(ui32 val)
111  {
112  #ifdef OJPH_COMPILER_MSVC
113  return (ui32)__popcnt(val);
114  #elif (defined OJPH_COMPILER_GNUC)
115  return (ui32)__builtin_popcount(val);
116  #else
117  val -= ((val >> 1) & 0x55555555);
118  val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
119  val = (((val >> 4) + val) & 0x0f0f0f0f);
120  val += (val >> 8);
121  val += (val >> 16);
122  return (int)(val & 0x0000003f);
123  #endif
124  }
125 
127 #ifdef OJPH_COMPILER_MSVC
128  #pragma intrinsic(_BitScanReverse)
129 #endif
130  static inline ui32 count_leading_zeros(ui32 val)
131  {
132  #ifdef OJPH_COMPILER_MSVC
133  unsigned long result = 0;
134  _BitScanReverse(&result, val);
135  return 31 ^ (ui32)result;
136  #elif (defined OJPH_COMPILER_GNUC)
137  return (ui32)__builtin_clz(val);
138  #else
139  val |= (val >> 1);
140  val |= (val >> 2);
141  val |= (val >> 4);
142  val |= (val >> 8);
143  val |= (val >> 16);
144  return 32 - population_count(val);
145  #endif
146  }
147 
149 #ifdef OJPH_COMPILER_MSVC
150  #pragma intrinsic(_BitScanForward)
151 #endif
152  static inline ui32 count_trailing_zeros(ui32 val)
153  {
154  #ifdef OJPH_COMPILER_MSVC
155  unsigned long result = 0;
156  _BitScanForward(&result, val);
157  return (ui32)result;
158  #elif (defined OJPH_COMPILER_GNUC)
159  return (ui32)__builtin_ctz(val);
160  #else
161  val |= (val << 1);
162  val |= (val << 2);
163  val |= (val << 4);
164  val |= (val << 8);
165  val |= (val << 16);
166  return 32 - population_count(val);
167  #endif
168  }
169 
171  static inline si32 ojph_round(float val)
172  {
173  #ifdef OJPH_COMPILER_MSVC
174  return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
175  #elif (defined OJPH_COMPILER_GNUC)
176  return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
177  #else
178  return (si32)round(val);
179  #endif
180  }
181 
183  static inline si32 ojph_trunc(float val)
184  {
185  #ifdef OJPH_COMPILER_MSVC
186  return (si32)(val);
187  #elif (defined OJPH_COMPILER_GNUC)
188  return (si32)(val);
189  #else
190  return (si32)trunc(val);
191  #endif
192  }
193 
195  // constants
197  const ui32 byte_alignment = 32; //32 bytes == 256 bits
200 
202  // templates for alignment
204 
206  // finds the size such that it is a multiple of byte_alignment
207  template <typename T, int N>
208  size_t calc_aligned_size(size_t size) {
209  size = size * sizeof(T) + N - 1;
210  size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
211  size >>= (31 - count_leading_zeros(sizeof(T)));
212  return size;
213  }
214 
216  // moves the pointer to first address that is a multiple of byte_alignment
217  template <typename T, int N>
218  inline T *align_ptr(T *ptr) {
219  intptr_t p = reinterpret_cast<intptr_t>(ptr);
220  p += N - 1;
221  p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
222  return reinterpret_cast<T *>(p);
223  }
224 
225 }
226 
227 #endif // !OJPH_ARCH_H
N
Definition: rvv-inl.h:1742
Definition: ojph_block_common.cpp:48
const ui32 object_alignment
Definition: ojph_arch.h:199
const ui32 byte_alignment
Definition: ojph_arch.h:197
@ X86_CPU_EXT_LEVEL_AVX2
Definition: ojph_arch.h:104
@ X86_CPU_EXT_LEVEL_AVX
Definition: ojph_arch.h:103
@ X86_CPU_EXT_LEVEL_AVX512
Definition: ojph_arch.h:106
@ X86_CPU_EXT_LEVEL_GENERIC
Definition: ojph_arch.h:95
@ X86_CPU_EXT_LEVEL_SSE2
Definition: ojph_arch.h:98
@ X86_CPU_EXT_LEVEL_SSE41
Definition: ojph_arch.h:101
@ X86_CPU_EXT_LEVEL_SSE
Definition: ojph_arch.h:97
@ X86_CPU_EXT_LEVEL_MMX
Definition: ojph_arch.h:96
@ X86_CPU_EXT_LEVEL_SSE42
Definition: ojph_arch.h:102
@ X86_CPU_EXT_LEVEL_SSSE3
Definition: ojph_arch.h:100
@ X86_CPU_EXT_LEVEL_SSE3
Definition: ojph_arch.h:99
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition: ojph_arch.h:105
static si32 ojph_round(float val)
Definition: ojph_arch.h:171
size_t calc_aligned_size(size_t size)
Definition: ojph_arch.h:208
OJPH_EXPORT int get_cpu_ext_level()
static ui32 population_count(ui32 val)
Definition: ojph_arch.h:110
static si32 ojph_trunc(float val)
Definition: ojph_arch.h:183
T * align_ptr(T *ptr)
Definition: ojph_arch.h:218
static ui32 count_trailing_zeros(ui32 val)
Definition: ojph_arch.h:152
static ui32 count_leading_zeros(ui32 val)
Definition: ojph_arch.h:130
int32_t si32
Definition: ojph_defs.h:55
const ui32 log_byte_alignment
Definition: ojph_arch.h:198
uint32_t ui32
Definition: ojph_defs.h:54
#define OJPH_EXPORT
Definition: ojph_arch.h:85