You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
this is another out of memory exception. We used 8 32GB gpus for training. If you want to decrease the memory consumption during training, you'll have to decrease either the sequence length or the frame resolution.
Kindly help me to fix this error
mot17_fulltrain |#### | [2462/17757]|Tot: 0:03:39 |ETA: 0:20:00 |tot 0.077s (0.089s) |load 0.000s (0.000s) |pre 0.001s (0.001s) |net 0.068s (0.082s) |dec 0.002s (0.003s) |post mot17_fulltrain |#### | [2463/17757]|Tot: 0:03:39 |ETA: 0:20:00 |tot 0.079s (0.089s) |load 0.000s (0.000s) |pre 0.002s (0.001s) |net 0.071s (0.082s) |dec 0.002s (0.003s) |post mot17_fulltrain |#### | [2464/17757]|Tot: 0:03:39 |ETA: 0:20:00 |tot 0.077s (0.089s) |load 0.000s (0.000s) |pre 0.002s (0.001s) |net 0.069s (0.082s) |dec 0.002s (0.003s) |post mot17_fulltrain |#### | [2465/17757]|Tot: 0:03:39 |ETA: 0:20:00 |tot 0.077s (0.089s) |load 0.000s (0.000s) |pre 0.001s (0.001s) |net 0.069s (0.082s) |dec 0.002s (0.003s) |post mot17_fulltrain |#### | [2466/17757]|Tot: 0:03:39 |ETA: 0:20:00 |tot 0.078s (0.089s) |load 0.000s (0.000s) |pre 0.001s (0.001s) |net 0.070s (0.082s) |dec 0.002s (0.003s) |post mot17_fulltrain |#### | [2467/17757]|Tot: 0:03:39 |ETA: 0:19:46 |tot 0.077s (0.089s) |load 0.000s (0.000s) |pre 0.001s (0.001s) |net 0.068s (0.082s) |dec 0.002s (0.003s) |post mot17_fulltrain |#### | [2468/17757]|Tot: 0:03:39 |ETA: 0:19:46 |tot 0.077s (0.089s) |load 0.000s (0.000s) |pre 0.001s (0.001s) |net 0.069s (0.082s) |dec 0.002s (0.003s) |post 0.003s (0.002s) |merge 0.000s (0.000s) |track 0.001s (0.001s) Traceback (most recent call last):
File "test.py", line 195, in
prefetch_test(opt)
File "test.py", line 109, in prefetch_test
ret = detector.run(pre_processed_images)
File "/home/mca/Downloads/CenterTrackP/src/lib/detector.py", line 144, in run
images, self.pre_images, self.pre_hms, pre_inds, return_time=True, original_batch=pre_processed_images)
File "/home/mca/Downloads/CenterTrackP/src/lib/detector.py", line 414, in process
output, self.h = self.model.step(batch_list, self.h)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/base_model.py", line 117, in step
feats = self.imgpre2feats(x, None, torch.zeros(1))
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/dla.py", line 678, in imgpre2feats
y = self.do_tensor_pass(x, pre_img, pre_hm)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/dla.py", line 633, in do_tensor_pass
x = self.dla_up(x)
File "/home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, **kwargs)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/dla.py", line 574, in forward
ida(layers, len(layers) -i - 2, len(layers))
File "/home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, **kwargs)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/dla.py", line 547, in forward
layers[i] = node(layers[i] + layers[i - 1])
File "/home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, **kwargs)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/dla.py", line 518, in forward
x = self.conv(x)
File "/home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, kwargs)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/dcn_v2.py", line 128, in forward
self.deformable_groups)
File "/home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/dcn_v2.py", line 31, in forward
ctx.deformable_groups)
RuntimeError: CUDA out of memory. Tried to allocate 72.00 MiB (GPU 0; 7.93 GiB total capacity; 6.72 GiB already allocated; 19.31 MiB free; 6.99 GiB reserved in total by PyTorch) (malloc at /pytorch/c10/cuda/CUDACachingAllocator.cpp:289)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x33 (0x7fb5db859193 in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: + 0x1bccc (0x7fb5dba9accc in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libc10_cuda.so)
frame #2: + 0x1cd5e (0x7fb5dba9bd5e in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libc10_cuda.so)
frame #3: at::native::empty_cuda(c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) + 0x284 (0x7fb5e19226b4 in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #4: + 0x45bd7d8 (0x7fb5e02697d8 in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #5: + 0x1f4fb37 (0x7fb5ddbfbb37 in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #6: + 0x3f0f795 (0x7fb5dfbbb795 in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #7: + 0x1f4fb37 (0x7fb5ddbfbb37 in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libtorch.so)
frame #8: std::result_of<c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const::{lambda(c10::DispatchTable const&)#1} (c10::DispatchTable const&)>::type c10::LeftRightc10::DispatchTable::read<c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const::{lambda(c10::DispatchTable const&)#1}>(c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const::{lambda(c10::DispatchTable const&)#1}&&) const + 0x18c (0x7fb5d8d1d81c in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #9: c10::guts::infer_function_traits<c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const::{lambda(c10::DispatchTable const&)#1}>::type::return_type c10::impl::OperatorEntry::readDispatchTable<c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const::{lambda(c10::DispatchTable const&)#1}>(c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const::{lambda(c10::DispatchTable const&)#1}&&) const + 0x4e (0x7fb5d8d2b53c in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #10: at::Tensor c10::Dispatcher::callUnboxedOnly<at::Tensor, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat >(c10::OperatorHandle const&, c10::ArrayRef, c10::TensorOptions const&, c10::optionalc10::MemoryFormat) const + 0x9d (0x7fb5d8d28b1b in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #11: + 0x5912d (0x7fb5d8d2012d in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #12: dcn_v2_cuda_forward(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, int, int, int, int, int, int, int, int, int) + 0xa59 (0x7fb5d8d2108a in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #13: dcn_v2_forward(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, int, int, int, int, int, int, int, int, int) + 0x143 (0x7fb5d8cfa463 in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #14: + 0x3ffff (0x7fb5d8d06fff in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #15: + 0x3d6ae (0x7fb5d8d046ae in /home/mca/Downloads/CenterTrackP/src/lib/model/networks/DCNv2/_ext.cpython-36m-x86_64-linux-gnu.so)
frame #21: THPFunction_apply(_object, _object) + 0xa8f (0x7fb6268b382f in /home/mca/anaconda3/envs/CenterTrack_new/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
The text was updated successfully, but these errors were encountered: