diff -r 89742f1fa6cb -r 73fef626dae3 contrib/python-zstandard/zstd/common/compiler.h --- a/contrib/python-zstandard/zstd/common/compiler.h Tue Sep 25 20:55:03 2018 +0900 +++ b/contrib/python-zstandard/zstd/common/compiler.h Mon Oct 08 16:27:40 2018 -0700 @@ -77,9 +77,9 @@ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. */ #ifndef DYNAMIC_BMI2 - #if (defined(__clang__) && __has_attribute(__target__)) \ + #if ((defined(__clang__) && __has_attribute(__target__)) \ || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ && (defined(__x86_64__) || defined(_M_X86)) \ && !defined(__BMI2__) # define DYNAMIC_BMI2 1 @@ -88,15 +88,37 @@ #endif #endif -/* prefetch */ -#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) -#elif defined(__GNUC__) -# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +/* prefetch + * can be disabled, by declaring NO_PREFETCH macro + * All prefetch invocations use a single default locality 2, + * generating instruction prefetcht1, + * which, according to Intel, means "load data into L2 cache". + * This is a good enough "middle ground" for the time being, + * though in theory, it would be better to specialize locality depending on data being prefetched. + * Tests could not determine any sensible difference based on locality value. */ +#if defined(NO_PREFETCH) +# define PREFETCH(ptr) (void)(ptr) /* disabled */ #else -# define PREFETCH(ptr) /* disabled */ -#endif +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# else +# define PREFETCH(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH(_ptr + _pos); \ + } \ +} /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */