75 |
75 |
76 /* Enable runtime BMI2 dispatch based on the CPU. |
76 /* Enable runtime BMI2 dispatch based on the CPU. |
77 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. |
77 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. |
78 */ |
78 */ |
79 #ifndef DYNAMIC_BMI2 |
79 #ifndef DYNAMIC_BMI2 |
80 #if (defined(__clang__) && __has_attribute(__target__)) \ |
80 #if ((defined(__clang__) && __has_attribute(__target__)) \ |
81 || (defined(__GNUC__) \ |
81 || (defined(__GNUC__) \ |
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \ |
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ |
83 && (defined(__x86_64__) || defined(_M_X86)) \ |
83 && (defined(__x86_64__) || defined(_M_X86)) \ |
84 && !defined(__BMI2__) |
84 && !defined(__BMI2__) |
85 # define DYNAMIC_BMI2 1 |
85 # define DYNAMIC_BMI2 1 |
86 #else |
86 #else |
87 # define DYNAMIC_BMI2 0 |
87 # define DYNAMIC_BMI2 0 |
88 #endif |
88 #endif |
89 #endif |
89 #endif |
90 |
90 |
91 /* prefetch */ |
91 /* prefetch |
92 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ |
92 * can be disabled, by declaring NO_PREFETCH macro |
93 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ |
93 * All prefetch invocations use a single default locality 2, |
94 # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) |
94 * generating instruction prefetcht1, |
95 #elif defined(__GNUC__) |
95 * which, according to Intel, means "load data into L2 cache". |
96 # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) |
96 * This is a good enough "middle ground" for the time being, |
|
97 * though in theory, it would be better to specialize locality depending on data being prefetched. |
|
98 * Tests could not determine any sensible difference based on locality value. */ |
|
99 #if defined(NO_PREFETCH) |
|
100 # define PREFETCH(ptr) (void)(ptr) /* disabled */ |
97 #else |
101 #else |
98 # define PREFETCH(ptr) /* disabled */ |
102 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ |
99 #endif |
103 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ |
|
104 # define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) |
|
105 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) |
|
106 # define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) |
|
107 # else |
|
108 # define PREFETCH(ptr) (void)(ptr) /* disabled */ |
|
109 # endif |
|
110 #endif /* NO_PREFETCH */ |
|
111 |
|
112 #define CACHELINE_SIZE 64 |
|
113 |
|
114 #define PREFETCH_AREA(p, s) { \ |
|
115 const char* const _ptr = (const char*)(p); \ |
|
116 size_t const _size = (size_t)(s); \ |
|
117 size_t _pos; \ |
|
118 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ |
|
119 PREFETCH(_ptr + _pos); \ |
|
120 } \ |
|
121 } |
100 |
122 |
101 /* disable warnings */ |
123 /* disable warnings */ |
102 #ifdef _MSC_VER /* Visual Studio */ |
124 #ifdef _MSC_VER /* Visual Studio */ |
103 # include <intrin.h> /* For Visual 2005 */ |
125 # include <intrin.h> /* For Visual 2005 */ |
104 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ |
126 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ |