Skip to content

Commit

Permalink
minor
Browse files Browse the repository at this point in the history
  • Loading branch information
RaymondWang0 committed Jan 20, 2024
1 parent 7e8eb4d commit cc9dfb6
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 22 deletions.
24 changes: 4 additions & 20 deletions llm/src/GPTBigCodeGenerate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,33 +150,17 @@ std::string GPTBigCodeGenerate(std::string param_path, void *model_ptr, int mode
}
}

if (id == 2) {
break_cnt--;
continue;
} // eos
else if (id == 1)
continue;
break_cnt = 2;

bool skip = false;
if (id == 2277 && !previous_two_hash) {
previous_two_hash = true;
skip = true;
} else if (previous_two_hash && id == 29937) { // token = #
break_cnt = 0;
skip = true;
} else {
if (previous_two_hash) std::cout << "##" << std::endl;
previous_two_hash = false;
}
if (id == 0) {
break;
} // endoftext

last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(id);
embd.push_back(id);
generate_ids.push_back(id);
input_ids = std::vector<int>{id};

if (interactive && !skip) {
if (interactive) {
output += starcoder_id_to_token(vocab, id);
std::cout << starcoder_id_to_token(vocab, id) << std::flush;
}
Expand Down
4 changes: 2 additions & 2 deletions llm/src/nn_modules/Fp32GPTBigCodeAttention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ Fp32GPTBigCodeAttention::Fp32GPTBigCodeAttention(std::string param_path, const s

float *c_attn_weight, *c_proj_weight;
// allocate_aligned_memory(c_attn_weight, (config.embed_dim * (config.embed_dim + 2 * this->kv_dim) * sizeof(float)));
allocate_aligned_memory(c_attn_weight, (config.embed_dim * config.embed_dim * 3 * sizeof(float)) / 2);
allocate_aligned_memory(c_attn_weight, (config.embed_dim * config.embed_dim * 3 * sizeof(float)));
allocate_aligned_memory(c_proj_weight, (config.embed_dim * config.embed_dim * sizeof(float)));
float *c_attn_bias, *c_proj_bias;
// allocate_aligned_memory(c_attn_bias, ((config.embed_dim + 2 * this->kv_dim) * sizeof(float)));
allocate_aligned_memory(c_attn_bias, (config.embed_dim * 3 * sizeof(float)) / 2);
allocate_aligned_memory(c_attn_bias, (config.embed_dim * 3 * sizeof(float)));
allocate_aligned_memory(c_proj_bias, (config.embed_dim * sizeof(float)));

// this->c_attn =
Expand Down
12 changes: 12 additions & 0 deletions llm/tools/download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@
"url": "https://www.dropbox.com/scl/fi/zvmdw8cdf7j0j3a99q8sx/OPT_125m.zip?rlkey=qehxgfs21m36wvm7ratwy1r5d&dl=1",
"md5sum": "816958aed84120b763942ba83c1b010f",
},
"StarCoder_15.5B_fp32": {
"url": "https://www.dropbox.com/scl/fi/vc1956by8v275t0ol6vw5/StarCoder_15.5B.zip?rlkey=aydnpd9w9jhgtlfqo5krkd0yx&dl=1",
"md5sum": "e3e9301866f47ab84817b46467ac49f6",
},
}

Qmodels = {
Expand Down Expand Up @@ -86,6 +90,10 @@
"url": "https://www.dropbox.com/scl/fi/7lu8rz8z5npe2nccfr66n/OPT_6.7B.zip?rlkey=5dtie29ncqscifs2g4ylpwnz7&dl=1", # noqa: E501
"md5sum": "6d061dc64ccc60864391f484b5e564d0",
},
"StarCoder_15.5B_awq_int4": {
"url": "https://www.dropbox.com/scl/fi/fe4dkrnzc25bt166w6bby/StarCoder_15.5B.zip?rlkey=ml1x96uep2k03z78ci7s1c0yb&dl=1",
"md5sum": "0f16236c0aec0b32b553248cc78b8caf",
},
},
"QM_x86": {
"LLaMA_7B_awq_int4": {
Expand Down Expand Up @@ -120,6 +128,10 @@
"url": "https://www.dropbox.com/scl/fi/uj4z3kp5wd3cvaaiyppvs/OPT_6.7B.zip?rlkey=yw5dxd18ajsc20g3mr2rqvnnt&dl=1", # noqa: E501
"md5sum": "4aba1bee864029d06d1fec67f4d95a22",
},
"StarCoder_15.5B_awq_int4": {
"url": "https://www.dropbox.com/scl/fi/86o2cblncmfd3xvuyyaqc/StarCoder_15.5B.zip?rlkey=2gswnyq9xihencaduddylpb2k&dl=1",
"md5sum": "48383ce0bf01b137069e3612cab8525f",
},
},
"QM_CUDA": {
"LLaMA2_7B_chat_awq_int4": {
Expand Down

0 comments on commit cc9dfb6

Please sign in to comment.