--- a/contrib/python-zstandard/zstd/common/compiler.h Tue Sep 25 20:55:03 2018 +0900
+++ b/contrib/python-zstandard/zstd/common/compiler.h Mon Oct 08 16:27:40 2018 -0700
@@ -77,9 +77,9 @@
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
- #if (defined(__clang__) && __has_attribute(__target__)) \
+ #if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
- && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
@@ -88,15 +88,37 @@
#endif
#endif
-/* prefetch */
-#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
-# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
-#elif defined(__GNUC__)
-# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH macro
+ * All prefetch invocations use a single default locality 2,
+ * generating instruction prefetcht1,
+ * which, according to Intel, means "load data into L2 cache".
+ * This is a good enough "middle ground" for the time being,
+ * though in theory, it would be better to specialize locality depending on data being prefetched.
+ * Tests could not determine any sensible difference based on locality value. */
+#if defined(NO_PREFETCH)
+# define PREFETCH(ptr) (void)(ptr) /* disabled */
#else
-# define PREFETCH(ptr) /* disabled */
-#endif
+# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
+# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+# else
+# define PREFETCH(ptr) (void)(ptr) /* disabled */
+# endif
+#endif /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s) { \
+ const char* const _ptr = (const char*)(p); \
+ size_t const _size = (size_t)(s); \
+ size_t _pos; \
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+ PREFETCH(_ptr + _pos); \
+ } \
+}
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */