Skip to content

Commit

Permalink
Tweaks.
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentMazare committed Sep 25, 2024
1 parent 46acac5 commit 43a8cbe
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions candle-core/src/quantized/cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@ fn ceil_div(p: usize, q: usize) -> usize {
}

fn pad(p: usize, q: usize) -> usize {
ceil_div(p, q) * q
}

fn pad_for_alloc(p: usize) -> usize {
// Overallocate by q rather than just padding by q as this should pad the last row
// and we don't have enough information here to know how many elements to add :(
// ceil_div(p, q) * q
p + q
p + MATRIX_ROW_PADDING
}

fn quantize_q8_1(
Expand Down Expand Up @@ -442,7 +445,7 @@ impl QCudaStorage {
}
_ => crate::bail!("only f32 can be quantized"),
};
let src_len = pad(src.len(), MATRIX_ROW_PADDING);
let src_len = pad_for_alloc(src.len());
let src = crate::Storage::Cpu(crate::CpuStorage::F32(src));
let mut qcpu_storage = crate::Device::Cpu.qzeros(src_len, self.dtype)?;
qcpu_storage.quantize(&src)?;
Expand Down

0 comments on commit 43a8cbe

Please sign in to comment.