You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
When I run a model with custom cpu ops by DeepRec, it works. But if I use BladeDISC to optimize the performance, the model running will hang.
Thread 1 (Thread 0x7f44a9d05740 (LWP 29790)):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x00007f44075617f4 in nsync::nsync_mu_semaphore_p_with_deadline(nsync::nsync_semaphore_s_*, timespec) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#2 0x00007f4407560e09 in nsync::nsync_sem_wait_with_cancel_(nsync::waiter*, timespec, nsync::nsync_note_s_*) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#3 0x00007f440755e183 in nsync::nsync_cv_wait_with_deadline_generic(nsync::nsync_cv_s_*, void*, void (*)(void*), void (*)(void*), timespec, nsync::nsync_note_s_*) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#4 0x00007f440755e667 in nsync::nsync_cv_wait_with_deadline(nsync::nsync_cv_s_*, nsync::nsync_mu_s_*, timespec, nsync::nsync_note_s_*) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#5 0x00007f440bc88bbc in tensorflow::DirectSession::WaitForNotification(tensorflow::Notification*, long long) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#6 0x00007f440bc88c8f in tensorflow::DirectSession::WaitForNotification(tensorflow::DirectSession::RunState*, tensorflow::CancellationManager*, long long) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#7 0x00007f440bc95971 in tensorflow::DirectSession::RunInternal(long long, tensorflow::RunOptions const&, tensorflow::CallFrameInterface*, tensorflow::DirectSession::ExecutorsAndKeys*, tensorflow::RunMetadata*, tensorflow::thread::ThreadPoolOptions const&) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#8 0x00007f440bca9cfc in tensorflow::DirectSession::Run(tensorflow::RunOptions const&, std::vector<std::pair<std::string, tensorflow::Tensor>, std::allocator<std::pair<std::string, tensorflow::Tensor> > > const&, std::vector<std::string, std::allocator<std::string> > const&, std::vector<std::string, std::allocator<std::string> > const&, std::vector<tensorflow::Tensor, std::allocator<tensorflow::Tensor> >*, tensorflow::RunMetadata*) () from /opt/venv_disc/lib/python3.8/site-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
#9 0x00007f4403c2dfb5 in tensorflow::SessionRef::Run(tensorflow::RunOptions const&, std::vector<std::pair<std::string, tensorflow::Tensor>, std::allocator<std::pair<std::string, tensorflow::Tensor> > > const&,
other threads:
Thread 6 (Thread 0x7f449ec98700 (LWP 29799)):
#0 futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f44a8ca8d60 <thread_status+608>) at ../sysdeps/nptl/futex-internal.h:183
#1 __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f44a8ca8d10 <thread_status+528>, cond=0x7f44a8ca8d38 <thread_status+568>) at pthread_cond_wait.c:508
#2 __pthread_cond_wait (cond=0x7f44a8ca8d38 <thread_status+568>, mutex=0x7f44a8ca8d10 <thread_status+528>) at pthread_cond_wait.c:647
#3 0x00007f44a72aa60b in blas_thread_server () from /opt/venv_disc/lib/python3.8/site-packages/numpy/core/../../numpy.libs/libopenblasp-r0-34a18dc3.3.7.so
#4 0x00007f44a9eb6609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f44a9ff0133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 5 (Thread 0x7f44a1499700 (LWP 29798)):
#0 futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f44a8ca8ce0 <thread_status+480>) at ../sysdeps/nptl/futex-internal.h:183
--Type <RET> for more, q to quit, c to continue without paging--
#1 __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f44a8ca8c90 <thread_status+400>, cond=0x7f44a8ca8cb8 <thread_status+440>) at pthread_cond_wait.c:508
#2 __pthread_cond_wait (cond=0x7f44a8ca8cb8 <thread_status+440>, mutex=0x7f44a8ca8c90 <thread_status+400>) at pthread_cond_wait.c:647
#3 0x00007f44a72aa60b in blas_thread_server () from /opt/venv_disc/lib/python3.8/site-packages/numpy/core/../../numpy.libs/libopenblasp-r0-34a18dc3.3.7.so
#4 0x00007f44a9eb6609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f44a9ff0133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 4 (Thread 0x7f44a3c9a700 (LWP 29797)):
#0 futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f44a8ca8c60 <thread_status+352>) at ../sysdeps/nptl/futex-internal.h:183
#1 __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f44a8ca8c10 <thread_status+272>, cond=0x7f44a8ca8c38 <thread_status+312>) at pthread_cond_wait.c:508
#2 __pthread_cond_wait (cond=0x7f44a8ca8c38 <thread_status+312>, mutex=0x7f44a8ca8c10 <thread_status+272>) at pthread_cond_wait.c:647
#3 0x00007f44a72aa60b in blas_thread_server () from /opt/venv_disc/lib/python3.8/site-packages/numpy/core/../../numpy.libs/libopenblasp-r0-34a18dc3.3.7.so
#4 0x00007f44a9eb6609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f44a9ff0133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 3 (Thread 0x7f44a649b700 (LWP 29796)):
#0 futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f44a8ca8be0 <thread_status+224>) at ../sysdeps/nptl/futex-internal.h:183
#1 __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f44a8ca8b90 <thread_status+144>, cond=0x7f44a8ca8bb8 <thread_status+184>) at pthread_cond_wait.c:508
#2 __pthread_cond_wait (cond=0x7f44a8ca8bb8 <thread_status+184>, mutex=0x7f44a8ca8b90 <thread_status+144>) at pthread_cond_wait.c:647
#3 0x00007f44a72aa60b in blas_thread_server () from /opt/venv_disc/lib/python3.8/site-packages/numpy/core/../../numpy.libs/libopenblasp-r0-34a18dc3.3.7.so
#4 0x00007f44a9eb6609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f44a9ff0133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 2 (Thread 0x7f44a6c9c700 (LWP 29795)):
#0 futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f44a8ca8b60 <thread_status+96>) at ../sysdeps/nptl/futex-internal.h:183
#1 __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f44a8ca8b10 <thread_status+16>, cond=0x7f44a8ca8b38 <thread_status+56>) at pthread_cond_wait.c:508
#2 __pthread_cond_wait (cond=0x7f44a8ca8b38 <thread_status+56>, mutex=0x7f44a8ca8b10 <thread_status+16>) at pthread_cond_wait.c:647
#3 0x00007f44a72aa60b in blas_thread_server () from /opt/venv_disc/lib/python3.8/site-packages/numpy/core/../../numpy.libs/libopenblasp-r0-34a18dc3.3.7.so
#4 0x00007f44a9eb6609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f44a9ff0133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
To Reproduce
import blade_disc_tf as disc
disc.enable()
I only add these two lines to my code, then it will hang.
Describe the bug
When I run a model with custom cpu ops by DeepRec, it works. But if I use BladeDISC to optimize the performance, the model running will hang.
To Reproduce
I only add these two lines to my code, then it will hang.
BladeDISC branch: features/deeprec2208-cu114
DeepRec: commit be62ec312595b51b74260f96a6c0872ce5f1540c (HEAD -> main, origin/main, origin/HEAD)
The text was updated successfully, but these errors were encountered: