@@ -134,40 +134,9 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
134134 return shadow_ptr;
135135}
136136
137- // For CPU device, each work group is a thread
138- inline Sid GetCurrentSid_CPU () {
139- // work-group linear id
140- const auto wg_lid =
141- __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
142- __spirv_BuiltInNumWorkgroups.z +
143- __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
144- __spirv_BuiltInWorkgroupId.z ;
145- return wg_lid;
146- }
147-
148- // For GPU device, each work item is a thread
149- inline Sid GetCurrentSid_GPU () {
150- // sub-group linear id
151- const auto lid = __spirv_BuiltInGlobalLinearId;
152- return lid;
153- }
154-
155137inline Sid GetCurrentSid () {
156- #if defined(__LIBDEVICE_CPU__)
157- return GetCurrentSid_CPU ();
158- #elif defined(__LIBDEVICE_PVC__)
159- return GetCurrentSid_GPU ();
160- #else
161- if (TsanLaunchInfo->DeviceTy == DeviceType::CPU) {
162- return GetCurrentSid_CPU ();
163- } else if (TsanLaunchInfo->DeviceTy != DeviceType::UNKNOWN) {
164- return GetCurrentSid_GPU ();
165- } else {
166- TSAN_DEBUG (__spirv_ocl_printf (__tsan_print_unsupport_device_type,
167- (int )TsanLaunchInfo->DeviceTy ));
168- return 0 ;
169- }
170- #endif
138+ const auto lid = __spirv_BuiltInGlobalLinearId;
139+ return lid % kThreadSlotCount ;
171140}
172141
173142inline RawShadow LoadShadow (const __SYCL_GLOBAL__ RawShadow *p) {
@@ -466,10 +435,6 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
466435
467436DEVICE_EXTERN_C_INLINE void __tsan_device_barrier () {
468437 Sid sid = GetCurrentSid ();
469- __spirv_ControlBarrier (__spv::Scope::Device, __spv::Scope::Device,
470- __spv::MemorySemanticsMask::SequentiallyConsistent |
471- __spv::MemorySemanticsMask::CrossWorkgroupMemory |
472- __spv::MemorySemanticsMask::WorkgroupMemory);
473438
474439 // sync current thread clock to global state
475440 TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
@@ -484,19 +449,10 @@ DEVICE_EXTERN_C_INLINE void __tsan_device_barrier() {
484449 for (uptr i = 0 ; i < kThreadSlotCount ; i++)
485450 TsanLaunchInfo->Clock [sid].clk_ [i] =
486451 TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
487-
488- __spirv_ControlBarrier (__spv::Scope::Device, __spv::Scope::Device,
489- __spv::MemorySemanticsMask::SequentiallyConsistent |
490- __spv::MemorySemanticsMask::CrossWorkgroupMemory |
491- __spv::MemorySemanticsMask::WorkgroupMemory);
492452}
493453
494- static inline void __tsan_group_barrier_impl () {
454+ DEVICE_EXTERN_C_INLINE void __tsan_group_barrier () {
495455 Sid sid = GetCurrentSid ();
496- __spirv_ControlBarrier (__spv::Scope::Workgroup, __spv::Scope::Workgroup,
497- __spv::MemorySemanticsMask::SequentiallyConsistent |
498- __spv::MemorySemanticsMask::CrossWorkgroupMemory |
499- __spv::MemorySemanticsMask::WorkgroupMemory);
500456
501457 // sync current thread clock to global state
502458 TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
@@ -511,23 +467,6 @@ static inline void __tsan_group_barrier_impl() {
511467 for (uptr i = 0 ; i < kThreadSlotCount ; i++)
512468 TsanLaunchInfo->Clock [sid].clk_ [i] =
513469 TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
514-
515- __spirv_ControlBarrier (__spv::Scope::Workgroup, __spv::Scope::Workgroup,
516- __spv::MemorySemanticsMask::SequentiallyConsistent |
517- __spv::MemorySemanticsMask::CrossWorkgroupMemory |
518- __spv::MemorySemanticsMask::WorkgroupMemory);
519- }
520-
521- DEVICE_EXTERN_C_INLINE void __tsan_group_barrier () {
522- #if defined(__LIBDEVICE_CPU__)
523- return ;
524- #elif defined(__LIBDEVICE_PVC__)
525- __tsan_group_barrier_impl ();
526- #else
527- if (TsanLaunchInfo->DeviceTy == DeviceType::CPU)
528- return ;
529- __tsan_group_barrier_impl ();
530- #endif
531470}
532471
533472#endif // __SPIR__ || __SPIRV__
0 commit comments