Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions .github/workflows/dynamic_arch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ jobs:
check-name: "linux_thread_stress (openmp)"
- backend: tsan
check-name: linux_thread_sanitizer
- backend: tsan-openmp
check-name: linux_thread_sanitizer_openmp

steps:
- name: Checkout repository
Expand All @@ -362,8 +364,11 @@ jobs:
EOF
sudo apt-get update
sudo apt-get install -y ccache cmake ninja-build
if [ "${{ matrix.backend }}" = "tsan" ]; then
sudo apt-get install -y clang llvm
case "${{ matrix.backend }}" in
tsan|tsan-openmp) sudo apt-get install -y clang llvm ;;
esac
if [ "${{ matrix.backend }}" = "tsan-openmp" ]; then
sudo apt-get install -y libomp-dev
fi

- name: Compilation cache
Expand Down Expand Up @@ -398,7 +403,7 @@ jobs:
sanitizer_flags=
if [ "${{ matrix.backend }}" = "openmp" ]; then
use_openmp=ON
elif [ "${{ matrix.backend }}" = "tsan" ]; then
elif [ "${{ matrix.backend }}" = "tsan" ] || [ "${{ matrix.backend }}" = "tsan-openmp" ]; then
build_type=RelWithDebInfo
c_compiler=clang
cxx_compiler=clang++
Expand All @@ -408,6 +413,10 @@ jobs:
dgemm_mixed_args="131072;8;10"
dgemv_args="64;4;1"
sanitizer_flags="-fsanitize=thread -g -O1 -fno-omit-frame-pointer"
if [ "${{ matrix.backend }}" = "tsan-openmp" ]; then
use_openmp=ON
cpp_thread_safety_use_openmp=ON
fi
fi
cmake_args=(
-G Ninja
Expand Down Expand Up @@ -436,7 +445,7 @@ jobs:
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
)
if [ "${{ matrix.backend }}" = "tsan" ]; then
if [ "${{ matrix.backend }}" = "tsan" ] || [ "${{ matrix.backend }}" = "tsan-openmp" ]; then
cmake_args+=(
"-DCMAKE_C_FLAGS=$sanitizer_flags"
"-DCMAKE_CXX_FLAGS=$sanitizer_flags"
Expand All @@ -459,10 +468,24 @@ jobs:
timeout-minutes: 30
run: |
cd build
if [ "${{ matrix.backend }}" = "tsan" ]; then
if [ "${{ matrix.backend }}" = "tsan" ] || [ "${{ matrix.backend }}" = "tsan-openmp" ]; then
export LLVM_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer
export TSAN_OPTIONS=halt_on_error=1:exitcode=66:second_deadlock_stack=1
else
fi
if [ "${{ matrix.backend }}" = "tsan-openmp" ]; then
# Load Archer (the OMPT tool from Ubuntu's libomp-dev) so TSan
# understands libomp's barriers instead of flagging them as false positives. The OpenMP runtime will load this tool automatically if OMP_TOOL_LIBRARIES is set.
archer=$(clang -print-file-name=libarcher.so)
[ -f "$archer" ] || archer=$(find /usr/lib/llvm-* -name libarcher.so 2>/dev/null | head -1)
if [ ! -f "$archer" ]; then
echo "::error::libarcher.so not found (expected from libomp-dev)"
exit 1
fi
echo "Using distro Archer: $archer"
export OMP_TOOL_LIBRARIES="$archer"
export TSAN_OPTIONS="$TSAN_OPTIONS:ignore_noninstrumented_modules=1"
fi
if [ "${{ matrix.backend }}" != "tsan" ]; then
export OMP_NUM_THREADS=16
fi
export OPENBLAS_NUM_THREADS=8
Expand Down
60 changes: 43 additions & 17 deletions common.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,22 @@ extern "C" {
#endif
#endif

#ifndef ASSEMBLER
#ifdef HAVE_C11
#if defined(C_GCC) && ( __GNUC__ < 7)
// workaround for GCC bug 65467
#ifndef _Atomic
#define _Atomic volatile
#endif
#endif
#include <stdatomic.h>
#else
#ifndef _Atomic
#define _Atomic volatile
#endif
#endif
#endif

#if !defined(NOINCLUDE) && !defined(ASSEMBLER)
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -430,6 +446,12 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_quad.h"
#endif

#ifndef ASSEMBLER
#ifdef HAVE_C11
#define BLAS_LOCK_DEFINED
#endif
#endif

#ifdef ARCH_ALPHA
#include "common_alpha.h"
#endif
Expand Down Expand Up @@ -557,6 +579,27 @@ static __inline void blas_lock(volatile BLASULONG *address){
#define BLAS_LOCK_DEFINED
#endif

#ifdef HAVE_C11
static __inline void blas_lock(volatile BLASULONG *address) {
BLASULONG expected = 0;
while (!atomic_compare_exchange_strong((volatile _Atomic BLASULONG *)address,
&expected, (BLASULONG)1)) {
expected = 0;
YIELDING;
}
}
#endif

static __inline void blas_unlock(volatile BLASULONG *address){
#ifdef HAVE_C11
atomic_store((volatile _Atomic BLASULONG *)address, (BLASULONG)0);
#else
MB;
*address = 0;
#endif
}


#ifndef RPCC_DEFINED
#error "rpcc() implementation is missing for your platform"
#endif
Expand Down Expand Up @@ -740,19 +783,6 @@ __declspec(dllimport) int __cdecl omp_in_parallel(void);
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
#endif

#ifdef HAVE_C11
#if defined(C_GCC) && ( __GNUC__ < 7)
// workaround for GCC bug 65467
#ifndef _Atomic
#define _Atomic volatile
#endif
#endif
#include <stdatomic.h>
#else
#ifndef _Atomic
#define _Atomic volatile
#endif
#endif

#else
#ifdef __ELF__
Expand All @@ -761,10 +791,6 @@ int omp_get_num_procs(void) __attribute__ ((weak));
#endif
#endif

static __inline void blas_unlock(volatile BLASULONG *address){
MB;
*address = 0;
}

#ifdef OS_WINDOWSSTORE
static __inline int readenv_atoi(char *env) {
Expand Down
2 changes: 2 additions & 0 deletions common_alpha.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#define WMB asm("wmb")
#define RMB asm("mb")

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(unsigned long *address){
#ifndef __DECC
unsigned long tmp1, tmp2;
Expand Down Expand Up @@ -78,6 +79,7 @@ static __inline void blas_lock(unsigned long *address){
#endif
}
#define BLAS_LOCK_DEFINED
#endif

static __inline unsigned int rpcc(void){

Expand Down
2 changes: 2 additions & 0 deletions common_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile BLASULONG *address){

int register ret;
Expand All @@ -75,6 +76,7 @@ static __inline void blas_lock(volatile BLASULONG *address){

#define BLAS_LOCK_DEFINED
#endif
#endif

static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
Expand Down
2 changes: 2 additions & 0 deletions common_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ static __inline int WhereAmI(void){
}
#endif

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile BLASULONG *address){

BLASULONG ret;
Expand Down Expand Up @@ -101,6 +102,7 @@ static __inline void blas_lock(volatile BLASULONG *address){
}

#define BLAS_LOCK_DEFINED
#endif

#if !defined(OS_DARWIN) && !defined (OS_ANDROID)
static __inline BLASULONG rpcc(void){
Expand Down
4 changes: 4 additions & 0 deletions common_ia64.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#define RPCC64BIT

#ifndef __ECC
#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile unsigned long *address){

unsigned long ret;
Expand All @@ -70,6 +71,7 @@ static __inline void blas_lock(volatile unsigned long *address){
} while (ret);
}
#define BLAS_LOCK_DEFINED
#endif

static __inline unsigned long rpcc(void) {
unsigned long clocks;
Expand Down Expand Up @@ -98,11 +100,13 @@ static __inline void ldmxcsr(unsigned long fp) {

#else

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile unsigned long *address){
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
;
}
#define BLAS_LOCK_DEFINED
#endif

static __inline unsigned int rpcc(void) {
return __getReg(_IA64_REG_AR_ITC);
Expand Down
2 changes: 2 additions & 0 deletions common_power.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@

void *qalloc(int flags, size_t bytes);

#ifndef BLAS_LOCK_DEFINED
static inline void blas_lock(volatile unsigned long *address){

long int ret, val = 1;
Expand Down Expand Up @@ -123,6 +124,7 @@ static inline void blas_lock(volatile unsigned long *address){
} while (ret);
}
#define BLAS_LOCK_DEFINED
#endif

static inline unsigned long rpcc(void){
unsigned long ret;
Expand Down
2 changes: 2 additions & 0 deletions common_sparc.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

#ifndef ASSEMBLER

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile unsigned long *address){

long int ret = 1;
Expand All @@ -60,6 +61,7 @@ static __inline void blas_lock(volatile unsigned long *address){
} while (ret);
}
#define BLAS_LOCK_DEFINED
#endif

static __inline unsigned long rpcc(void){
unsigned long clocks;
Expand Down
2 changes: 1 addition & 1 deletion common_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ extern void goto_set_num_threads(int nthreads);
#undef TIMING_DEBUG

/* Global Parameter */
extern int blas_cpu_number;
extern _Atomic int blas_cpu_number;
extern int blas_num_threads;
extern int blas_omp_linked;

Expand Down
2 changes: 2 additions & 0 deletions common_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#define __volatile__
#endif

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile BLASULONG *address){

int ret;
Expand Down Expand Up @@ -83,6 +84,7 @@ static __inline void blas_lock(volatile BLASULONG *address){

}
#define BLAS_LOCK_DEFINED
#endif

static __inline unsigned long long rpcc(void){
#if defined(_MSC_VER) && !defined(__clang__)
Expand Down
2 changes: 2 additions & 0 deletions common_x86_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
#define RMB
#endif

#ifndef BLAS_LOCK_DEFINED
static __inline void blas_lock(volatile BLASULONG *address){


Expand All @@ -95,6 +96,7 @@ static __inline void blas_lock(volatile BLASULONG *address){

}
#define BLAS_LOCK_DEFINED
#endif

static __inline BLASULONG rpcc(void){
#ifdef C_MSVC
Expand Down
2 changes: 1 addition & 1 deletion driver/level3/level3_thread_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

static omp_lock_t level3_lock, critical_section_lock;
static volatile BLASULONG init_lock = 0;
static volatile BLASULONG omp_lock_initialized = 0;
static _Atomic BLASULONG omp_lock_initialized = 0;
static volatile BLASULONG parallel_section_left = MAX_PARALLEL_NUMBER;

static void blas_level3_thread_lock_init(void)
Expand Down
8 changes: 2 additions & 6 deletions driver/others/blas_server_omp.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,8 @@ int blas_omp_threads_local = 1;

extern int openblas_omp_adaptive_env(void);

static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
#else
static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
#endif
static _Atomic(void *) blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
static _Atomic _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];

static void adjust_thread_buffers(void) {

Expand Down
Loading
Loading