Skip to content

Commit

Permalink
fixed an error handling in bench_latency.py (#904)
Browse files Browse the repository at this point in the history
  • Loading branch information
min-xu-et authored Aug 4, 2024
1 parent 947402c commit 7dd8a7e
Showing 1 changed file with 11 additions and 8 deletions.
19 changes: 11 additions & 8 deletions python/sglang/srt/managers/schedule_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,13 +380,15 @@ def prepare_for_extend(self, vocab_size: int, int_token_logit_bias: torch.Tensor
extend_num_tokens = seq_lens.sum() - prefix_lens.sum()
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
if out_cache_loc is None:
self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free)
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
if self.tree_cache is not None:
self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free)
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)

if out_cache_loc is None:
logger.error("Prefill out of memory. This should never happen.")
self.tree_cache.pretty_print()
exit()
logger.error("Prefill out of memory. Try to lower your batch size.")
if self.tree_cache is not None:
self.tree_cache.pretty_print()
exit(1)

pt = 0
for i in range(bs):
Expand Down Expand Up @@ -637,9 +639,10 @@ def prepare_for_decode(self, input_ids=None):
self.out_cache_loc = self.token_to_kv_pool.alloc(bs)

if self.out_cache_loc is None:
logger.error("Decode out of memory. This should never happen.")
self.tree_cache.pretty_print()
exit()
logger.error("Decode out of memory. Try to lower your batch size.")
if self.tree_cache is not None:
self.tree_cache.pretty_print()
exit(1)

self.req_to_token_pool.req_to_token[
self.req_pool_indices, self.seq_lens - 1
Expand Down

0 comments on commit 7dd8a7e

Please sign in to comment.