diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 22f018ab2c..ad3a380784 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -349,6 +349,8 @@ jobs: check-name: "linux_thread_stress (openmp)" - backend: tsan check-name: linux_thread_sanitizer + - backend: tsan-openmp + check-name: linux_thread_sanitizer_openmp steps: - name: Checkout repository @@ -362,8 +364,11 @@ jobs: EOF sudo apt-get update sudo apt-get install -y ccache cmake ninja-build - if [ "${{ matrix.backend }}" = "tsan" ]; then - sudo apt-get install -y clang llvm + case "${{ matrix.backend }}" in + tsan|tsan-openmp) sudo apt-get install -y clang llvm ;; + esac + if [ "${{ matrix.backend }}" = "tsan-openmp" ]; then + sudo apt-get install -y libomp-dev fi - name: Compilation cache @@ -398,7 +403,7 @@ jobs: sanitizer_flags= if [ "${{ matrix.backend }}" = "openmp" ]; then use_openmp=ON - elif [ "${{ matrix.backend }}" = "tsan" ]; then + elif [ "${{ matrix.backend }}" = "tsan" ] || [ "${{ matrix.backend }}" = "tsan-openmp" ]; then build_type=RelWithDebInfo c_compiler=clang cxx_compiler=clang++ @@ -408,6 +413,10 @@ jobs: dgemm_mixed_args="131072;8;10" dgemv_args="64;4;1" sanitizer_flags="-fsanitize=thread -g -O1 -fno-omit-frame-pointer" + if [ "${{ matrix.backend }}" = "tsan-openmp" ]; then + use_openmp=ON + cpp_thread_safety_use_openmp=ON + fi fi cmake_args=( -G Ninja @@ -436,7 +445,7 @@ jobs: -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ) - if [ "${{ matrix.backend }}" = "tsan" ]; then + if [ "${{ matrix.backend }}" = "tsan" ] || [ "${{ matrix.backend }}" = "tsan-openmp" ]; then cmake_args+=( "-DCMAKE_C_FLAGS=$sanitizer_flags" "-DCMAKE_CXX_FLAGS=$sanitizer_flags" @@ -459,10 +468,24 @@ jobs: timeout-minutes: 30 run: | cd build - if [ "${{ matrix.backend }}" = "tsan" ]; then + if [ "${{ matrix.backend }}" = "tsan" ] || [ "${{ matrix.backend }}" = "tsan-openmp" ]; then export LLVM_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer export TSAN_OPTIONS=halt_on_error=1:exitcode=66:second_deadlock_stack=1 - else + fi + if [ "${{ matrix.backend }}" = "tsan-openmp" ]; then + # Load Archer (the OMPT tool from Ubuntu's libomp-dev) so TSan + # understands libomp's barriers instead of flagging them as false positives. The OpenMP runtime will load this tool automatically if OMP_TOOL_LIBRARIES is set. + archer=$(clang -print-file-name=libarcher.so) + [ -f "$archer" ] || archer=$(find /usr/lib/llvm-* -name libarcher.so 2>/dev/null | head -1) + if [ ! -f "$archer" ]; then + echo "::error::libarcher.so not found (expected from libomp-dev)" + exit 1 + fi + echo "Using distro Archer: $archer" + export OMP_TOOL_LIBRARIES="$archer" + export TSAN_OPTIONS="$TSAN_OPTIONS:ignore_noninstrumented_modules=1" + fi + if [ "${{ matrix.backend }}" != "tsan" ]; then export OMP_NUM_THREADS=16 fi export OPENBLAS_NUM_THREADS=8 diff --git a/common.h b/common.h index 4746633c18..a611c9cb07 100644 --- a/common.h +++ b/common.h @@ -79,6 +79,22 @@ extern "C" { #endif #endif +#ifndef ASSEMBLER +#ifdef HAVE_C11 +#if defined(C_GCC) && ( __GNUC__ < 7) +// workaround for GCC bug 65467 +#ifndef _Atomic +#define _Atomic volatile +#endif +#endif +#include +#else +#ifndef _Atomic +#define _Atomic volatile +#endif +#endif +#endif + #if !defined(NOINCLUDE) && !defined(ASSEMBLER) #include #include @@ -430,6 +446,12 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_quad.h" #endif +#ifndef ASSEMBLER +#ifdef HAVE_C11 +#define BLAS_LOCK_DEFINED +#endif +#endif + #ifdef ARCH_ALPHA #include "common_alpha.h" #endif @@ -557,6 +579,27 @@ static __inline void blas_lock(volatile BLASULONG *address){ #define BLAS_LOCK_DEFINED #endif +#ifdef HAVE_C11 +static __inline void blas_lock(volatile BLASULONG *address) { + BLASULONG expected = 0; + while (!atomic_compare_exchange_strong((volatile _Atomic BLASULONG *)address, + &expected, (BLASULONG)1)) { + expected = 0; + YIELDING; + } +} +#endif + +static __inline void blas_unlock(volatile BLASULONG *address){ +#ifdef HAVE_C11 + atomic_store((volatile _Atomic BLASULONG *)address, (BLASULONG)0); +#else + MB; + *address = 0; +#endif +} + + #ifndef RPCC_DEFINED #error "rpcc() implementation is missing for your platform" #endif @@ -740,19 +783,6 @@ __declspec(dllimport) int __cdecl omp_in_parallel(void); __declspec(dllimport) int __cdecl omp_get_num_procs(void); #endif -#ifdef HAVE_C11 -#if defined(C_GCC) && ( __GNUC__ < 7) -// workaround for GCC bug 65467 -#ifndef _Atomic -#define _Atomic volatile -#endif -#endif -#include -#else -#ifndef _Atomic -#define _Atomic volatile -#endif -#endif #else #ifdef __ELF__ @@ -761,10 +791,6 @@ int omp_get_num_procs(void) __attribute__ ((weak)); #endif #endif -static __inline void blas_unlock(volatile BLASULONG *address){ - MB; - *address = 0; -} #ifdef OS_WINDOWSSTORE static __inline int readenv_atoi(char *env) { diff --git a/common_alpha.h b/common_alpha.h index e5380454aa..2a415efa4e 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -45,6 +45,7 @@ #define WMB asm("wmb") #define RMB asm("mb") +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(unsigned long *address){ #ifndef __DECC unsigned long tmp1, tmp2; @@ -78,6 +79,7 @@ static __inline void blas_lock(unsigned long *address){ #endif } #define BLAS_LOCK_DEFINED +#endif static __inline unsigned int rpcc(void){ diff --git a/common_arm.h b/common_arm.h index d0d2451434..85af7f09bc 100644 --- a/common_arm.h +++ b/common_arm.h @@ -53,6 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(ARMV6) || defined(ARMV7) || defined(ARMV8) +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile BLASULONG *address){ int register ret; @@ -75,6 +76,7 @@ static __inline void blas_lock(volatile BLASULONG *address){ #define BLAS_LOCK_DEFINED #endif +#endif static inline int blas_quickdivide(blasint x, blasint y){ return x / y; diff --git a/common_arm64.h b/common_arm64.h index 2002de90fd..d37a4a9cba 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -68,6 +68,7 @@ static __inline int WhereAmI(void){ } #endif +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile BLASULONG *address){ BLASULONG ret; @@ -101,6 +102,7 @@ static __inline void blas_lock(volatile BLASULONG *address){ } #define BLAS_LOCK_DEFINED +#endif #if !defined(OS_DARWIN) && !defined (OS_ANDROID) static __inline BLASULONG rpcc(void){ diff --git a/common_ia64.h b/common_ia64.h index 59aefbd6da..57bf5ca55e 100644 --- a/common_ia64.h +++ b/common_ia64.h @@ -56,6 +56,7 @@ #define RPCC64BIT #ifndef __ECC +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile unsigned long *address){ unsigned long ret; @@ -70,6 +71,7 @@ static __inline void blas_lock(volatile unsigned long *address){ } while (ret); } #define BLAS_LOCK_DEFINED +#endif static __inline unsigned long rpcc(void) { unsigned long clocks; @@ -98,11 +100,13 @@ static __inline void ldmxcsr(unsigned long fp) { #else +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile unsigned long *address){ while (*address || _InterlockedCompareExchange((volatile int *) address,1,0)) ; } #define BLAS_LOCK_DEFINED +#endif static __inline unsigned int rpcc(void) { return __getReg(_IA64_REG_AR_ITC); diff --git a/common_power.h b/common_power.h index ded76ad519..63863fb2fe 100644 --- a/common_power.h +++ b/common_power.h @@ -89,6 +89,7 @@ void *qalloc(int flags, size_t bytes); +#ifndef BLAS_LOCK_DEFINED static inline void blas_lock(volatile unsigned long *address){ long int ret, val = 1; @@ -123,6 +124,7 @@ static inline void blas_lock(volatile unsigned long *address){ } while (ret); } #define BLAS_LOCK_DEFINED +#endif static inline unsigned long rpcc(void){ unsigned long ret; diff --git a/common_sparc.h b/common_sparc.h index 4b9e7840a1..2b1898764d 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -45,6 +45,7 @@ #ifndef ASSEMBLER +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile unsigned long *address){ long int ret = 1; @@ -60,6 +61,7 @@ static __inline void blas_lock(volatile unsigned long *address){ } while (ret); } #define BLAS_LOCK_DEFINED +#endif static __inline unsigned long rpcc(void){ unsigned long clocks; diff --git a/common_thread.h b/common_thread.h index 633d5516d1..da4cdf638f 100644 --- a/common_thread.h +++ b/common_thread.h @@ -51,7 +51,7 @@ extern void goto_set_num_threads(int nthreads); #undef TIMING_DEBUG /* Global Parameter */ -extern int blas_cpu_number; +extern _Atomic int blas_cpu_number; extern int blas_num_threads; extern int blas_omp_linked; diff --git a/common_x86.h b/common_x86.h index 65fb9a4600..2462f76145 100644 --- a/common_x86.h +++ b/common_x86.h @@ -54,6 +54,7 @@ #define __volatile__ #endif +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile BLASULONG *address){ int ret; @@ -83,6 +84,7 @@ static __inline void blas_lock(volatile BLASULONG *address){ } #define BLAS_LOCK_DEFINED +#endif static __inline unsigned long long rpcc(void){ #if defined(_MSC_VER) && !defined(__clang__) diff --git a/common_x86_64.h b/common_x86_64.h index 143e188a79..119f9c4cf8 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -70,6 +70,7 @@ #define RMB #endif +#ifndef BLAS_LOCK_DEFINED static __inline void blas_lock(volatile BLASULONG *address){ @@ -95,6 +96,7 @@ static __inline void blas_lock(volatile BLASULONG *address){ } #define BLAS_LOCK_DEFINED +#endif static __inline BLASULONG rpcc(void){ #ifdef C_MSVC diff --git a/driver/level3/level3_thread_lock.c b/driver/level3/level3_thread_lock.c index 1153f3f3ee..95477c116b 100644 --- a/driver/level3/level3_thread_lock.c +++ b/driver/level3/level3_thread_lock.c @@ -33,7 +33,7 @@ static omp_lock_t level3_lock, critical_section_lock; static volatile BLASULONG init_lock = 0; -static volatile BLASULONG omp_lock_initialized = 0; +static _Atomic BLASULONG omp_lock_initialized = 0; static volatile BLASULONG parallel_section_left = MAX_PARALLEL_NUMBER; static void blas_level3_thread_lock_init(void) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 38b48fc842..d430f549b8 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -73,12 +73,8 @@ int blas_omp_threads_local = 1; extern int openblas_omp_adaptive_env(void); -static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; -#ifdef HAVE_C11 -static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; -#else -static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; -#endif +static _Atomic(void *) blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; +static _Atomic _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; static void adjust_thread_buffers(void) { diff --git a/driver/others/memory.c b/driver/others/memory.c index 17dfa60d3b..d7df981e66 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -416,7 +416,7 @@ void set_stack_limit(int limitMB){ OpenBLAS uses the numbers of CPU cores in multithreading. It can be set by openblas_set_num_threads(int num_threads); */ -int blas_cpu_number = 0; +_Atomic int blas_cpu_number = 0; /* The numbers of threads in the thread pool. This value is equal or large than blas_cpu_number. This means some threads are sleep. @@ -1320,11 +1320,11 @@ UNLOCK_COMMAND(&alloc_lock); #ifdef USE_OPENMP printf("with a larger NUM_THREADS value or set the environment variable OMP_NUM_THREADS to\n"); #else - printf("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n"); + printf("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n"); #endif printf("a sufficiently small number. This error typically occurs when the software that relies on\n"); printf("OpenBLAS calls BLAS functions from many threads in parallel, or when your computer has more\n"); - printf("cpu cores than what OpenBLAS was configured to handle.\n"); + printf("cpu cores than what OpenBLAS was configured to handle.\n"); return NULL; } @@ -1811,7 +1811,7 @@ int get_num_procs(void) { static int nums = 0; int ret; - + #if defined(__GLIBC_PREREQ) cpu_set_t cpuset,*cpusetp; size_t size; @@ -1841,7 +1841,7 @@ int get_num_procs(void) { #if !defined(OS_LINUX) return (nums > 0 ? nums :2); #endif - + #if !defined(__GLIBC_PREREQ) return (nums > 0 ? nums :2); #else @@ -1993,7 +1993,7 @@ void set_stack_limit(int limitMB){ OpenBLAS uses the numbers of CPU cores in multithreading. It can be set by openblas_set_num_threads(int num_threads); */ -int blas_cpu_number = 0; +_Atomic int blas_cpu_number = 0; /* The numbers of threads in the thread pool. This value is equal or large than blas_cpu_number. This means some threads are sleep. @@ -2103,7 +2103,7 @@ int hugetlb_allocated = 0; static struct release_t release_info[NUM_BUFFERS]; static struct release_t *new_release_info; -static int release_pos = 0; +static _Atomic int release_pos = 0; #if defined(OS_LINUX) && !defined(NO_WARMUP) static int hot_alloc = 0; @@ -2153,14 +2153,14 @@ static void *alloc_mmap(void *address){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].func = alloc_mmap_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].func = alloc_mmap_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_mmap_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].func = alloc_mmap_free; } - release_pos ++; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif @@ -2264,7 +2264,7 @@ static void *alloc_mmap(void *address){ #endif #endif -#ifdef BUILD_DOUBLE +#ifdef BUILD_DOUBLE allocsize = DGEMM_P * DGEMM_Q * sizeof(double); #elif defined(BUILD_COMPLEX16) allocsize = ZGEMM_P * ZGEMM_Q * sizeof(double); @@ -2322,14 +2322,14 @@ static void *alloc_mmap(void *address){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].func = alloc_mmap_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].func = alloc_mmap_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_mmap_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].func = alloc_mmap_free; } - release_pos ++; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif @@ -2360,14 +2360,14 @@ static void *alloc_malloc(void *address){ if (map_address == (void *)NULL) map_address = (void *)-1; if (map_address != (void *)-1) { - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].func = alloc_malloc_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].func = alloc_malloc_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_malloc_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].func = alloc_malloc_free; } - release_pos ++; } return map_address; @@ -2399,14 +2399,14 @@ static void *alloc_qalloc(void *address){ if (map_address == (void *)NULL) map_address = (void *)-1; if (map_address != (void *)-1) { - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].func = alloc_qalloc_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].func = alloc_qalloc_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_qalloc_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].func = alloc_qalloc_free; } - release_pos ++; } return (void *)(((BLASULONG)map_address + FIXED_PAGESIZE - 1) & ~(FIXED_PAGESIZE - 1)); @@ -2433,14 +2433,14 @@ static void *alloc_windows(void *address){ if (map_address == (void *)NULL) map_address = (void *)-1; if (map_address != (void *)-1) { - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].func = alloc_windows_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].func = alloc_windows_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_windows_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].func = alloc_windows_free; } - release_pos ++; } return map_address; @@ -2482,16 +2482,16 @@ static void *alloc_devicedirver(void *address){ fd, 0); if (map_address != (void *)-1) { - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].attr = fd; - release_info[release_pos].func = alloc_devicedirver_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].attr = fd; + release_info[rpos].func = alloc_devicedirver_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].attr = fd; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_devicedirver_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].attr = fd; + new_release_info[rpos-NUM_BUFFERS].func = alloc_devicedirver_free; } - release_pos ++; } return map_address; @@ -2526,16 +2526,16 @@ static void *alloc_shm(void *address){ shmctl(shmid, IPC_RMID, 0); - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].attr = shmid; - release_info[release_pos].func = alloc_shm_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].attr = shmid; + release_info[rpos].func = alloc_shm_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].attr = shmid; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_shm_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].attr = shmid; + new_release_info[rpos-NUM_BUFFERS].func = alloc_shm_free; } - release_pos ++; } return map_address; @@ -2643,14 +2643,14 @@ fprintf(stderr,"alloc_hugetlb got called\n"); #endif if (map_address != (void *)-1){ - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].func = alloc_hugetlb_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].func = alloc_hugetlb_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_hugetlb_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].func = alloc_hugetlb_free; } - release_pos ++; } return map_address; @@ -2695,16 +2695,16 @@ static void *alloc_hugetlbfile(void *address){ fd, 0); if (map_address != (void *)-1) { - if (likely(release_pos < NUM_BUFFERS)) { - release_info[release_pos].address = map_address; - release_info[release_pos].attr = fd; - release_info[release_pos].func = alloc_hugetlbfile_free; + int rpos = release_pos++; + if (likely(rpos < NUM_BUFFERS)) { + release_info[rpos].address = map_address; + release_info[rpos].attr = fd; + release_info[rpos].func = alloc_hugetlbfile_free; } else { - new_release_info[release_pos-NUM_BUFFERS].address = map_address; - new_release_info[release_pos-NUM_BUFFERS].attr = fd; - new_release_info[release_pos-NUM_BUFFERS].func = alloc_hugetlbfile_free; + new_release_info[rpos-NUM_BUFFERS].address = map_address; + new_release_info[rpos-NUM_BUFFERS].attr = fd; + new_release_info[rpos-NUM_BUFFERS].func = alloc_hugetlbfile_free; } - release_pos ++; } return map_address; @@ -2719,12 +2719,12 @@ static BLASULONG base_address = BASE_ADDRESS; #endif static volatile struct { - BLASULONG lock; - void *addr; + _Atomic BLASULONG lock; + void * _Atomic addr; #if defined(WHEREAMI) && !defined(USE_OPENMP) int pos; #endif - int used; + _Atomic int used; #ifndef __64BIT__ char dummy[48]; #else @@ -2733,14 +2733,14 @@ static volatile struct { } memory[NUM_BUFFERS]; -struct newmemstruct +struct newmemstruct { - BLASULONG lock; - void *addr; + _Atomic BLASULONG lock; + void * _Atomic addr; #if defined(WHEREAMI) && !defined(USE_OPENMP) int pos; #endif - int used; + _Atomic int used; #ifndef __64BIT__ char dummy[48]; #else @@ -2761,7 +2761,7 @@ static int memory_overflowed = 0; void *blas_memory_alloc(int procpos){ int i; - + int position; #if defined(WHEREAMI) && !defined(USE_OPENMP) int mypos = 0; @@ -2852,13 +2852,13 @@ void *blas_memory_alloc(int procpos){ #if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #else - blas_lock(&memory[position].lock); + blas_lock((BLASULONG *)&memory[position].lock); #endif if (!memory[position].used) goto allocation; #if defined(SMP) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #else - blas_unlock(&memory[position].lock); + blas_unlock((BLASULONG *)&memory[position].lock); #endif } @@ -2878,12 +2878,12 @@ void *blas_memory_alloc(int procpos){ RMB; #if defined(USE_OPENMP) if (!memory[position].used) { - blas_lock(&memory[position].lock); + blas_lock((BLASULONG *)&memory[position].lock); #endif if (!memory[position].used) goto allocation; #if defined(USE_OPENMP) - blas_unlock(&memory[position].lock); + blas_unlock((BLASULONG *)&memory[position].lock); } #endif position ++; @@ -2896,12 +2896,12 @@ void *blas_memory_alloc(int procpos){ RMB; #if defined(USE_OPENMP) if (!newmemory[position-NUM_BUFFERS].used) { - blas_lock(&newmemory[position-NUM_BUFFERS].lock); + blas_lock((BLASULONG *)&newmemory[position-NUM_BUFFERS].lock); #endif if (!newmemory[position-NUM_BUFFERS].used) goto allocation2; #if defined(USE_OPENMP) - blas_unlock(&newmemory[position-NUM_BUFFERS].lock); + blas_unlock((BLASULONG *)&newmemory[position-NUM_BUFFERS].lock); } #endif position ++; @@ -2923,7 +2923,7 @@ void *blas_memory_alloc(int procpos){ #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #else - blas_unlock(&memory[position].lock); + blas_unlock((BLASULONG *)&memory[position].lock); #endif if (!memory[position].addr) { int failcount = 0; @@ -3043,7 +3043,7 @@ void *blas_memory_alloc(int procpos){ #endif if (memory_overflowed) goto terminate; fprintf(stderr,"OpenBLAS warning: precompiled NUM_THREADS exceeded, adding auxiliary array for thread metadata.\n"); - fprintf(stderr,"Note that your application may still crash, if it is calling OpenBLAS from multiple threads in parallel\n"); + fprintf(stderr,"Note that your application may still crash, if it is calling OpenBLAS from multiple threads in parallel\n"); fprintf(stderr,"To avoid this warning, please rebuild your copy of OpenBLAS with a larger NUM_THREADS setting\n"); #ifdef USE_OPENMP fprintf(stderr,"or set the environment variable OMP_NUM_THREADS to %d or lower\n", MAX_CPU_NUMBER); @@ -3062,13 +3062,13 @@ void *blas_memory_alloc(int procpos){ newmemory[i].used = 0; newmemory[i].lock = 0; } - + allocation2: newmemory[position-NUM_BUFFERS].used = 1; #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #else - blas_unlock(&newmemory[position-NUM_BUFFERS].lock); + blas_unlock((BLASULONG *)&newmemory[position-NUM_BUFFERS].lock); #endif do { #ifdef DEBUG @@ -3154,11 +3154,11 @@ void *blas_memory_alloc(int procpos){ #ifdef USE_OPENMP printf("with a larger NUM_THREADS value or set the environment variable OMP_NUM_THREADS to\n"); #else - printf("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n"); + printf("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n"); #endif printf("a sufficiently small number. This error typically occurs when the software that relies on\n"); printf("OpenBLAS calls BLAS functions from many threads in parallel, or when your computer has more\n"); - printf("cpu cores than what OpenBLAS was configured to handle.\n"); + printf("cpu cores than what OpenBLAS was configured to handle.\n"); return NULL; } @@ -3278,7 +3278,7 @@ void blas_shutdown(void){ } free((void*)newmemory); newmemory = NULL; - memory_overflowed = 0; + memory_overflowed = 0; } UNLOCK_COMMAND(&alloc_lock); diff --git a/driver/others/memory_qalloc.c b/driver/others/memory_qalloc.c index a2593e01f7..eea897acc1 100644 --- a/driver/others/memory_qalloc.c +++ b/driver/others/memory_qalloc.c @@ -277,7 +277,7 @@ void set_stack_limit(int limitMB){ OpenBLAS uses the numbers of CPU cores in multithreading. It can be set by openblas_set_num_threads(int num_threads); */ -int blas_cpu_number = 0; +_Atomic int blas_cpu_number = 0; /* The numbers of threads in the thread pool. This value is equal or large than blas_cpu_number. This means some threads are sleep.