diff --git a/llm/src/GPTBigCodeGenerate.cc b/llm/src/GPTBigCodeGenerate.cc index 37e1f045..65b84190 100644 --- a/llm/src/GPTBigCodeGenerate.cc +++ b/llm/src/GPTBigCodeGenerate.cc @@ -150,25 +150,9 @@ std::string GPTBigCodeGenerate(std::string param_path, void *model_ptr, int mode } } - if (id == 2) { - break_cnt--; - continue; - } // eos - else if (id == 1) - continue; - break_cnt = 2; - - bool skip = false; - if (id == 2277 && !previous_two_hash) { - previous_two_hash = true; - skip = true; - } else if (previous_two_hash && id == 29937) { // token = # - break_cnt = 0; - skip = true; - } else { - if (previous_two_hash) std::cout << "##" << std::endl; - previous_two_hash = false; - } + if (id == 0) { + break; + } // endoftext last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(id); @@ -176,7 +160,7 @@ std::string GPTBigCodeGenerate(std::string param_path, void *model_ptr, int mode generate_ids.push_back(id); input_ids = std::vector{id}; - if (interactive && !skip) { + if (interactive) { output += starcoder_id_to_token(vocab, id); std::cout << starcoder_id_to_token(vocab, id) << std::flush; } diff --git a/llm/src/nn_modules/Fp32GPTBigCodeAttention.cc b/llm/src/nn_modules/Fp32GPTBigCodeAttention.cc index 9e5b783f..36731021 100644 --- a/llm/src/nn_modules/Fp32GPTBigCodeAttention.cc +++ b/llm/src/nn_modules/Fp32GPTBigCodeAttention.cc @@ -71,11 +71,11 @@ Fp32GPTBigCodeAttention::Fp32GPTBigCodeAttention(std::string param_path, const s float *c_attn_weight, *c_proj_weight; // allocate_aligned_memory(c_attn_weight, (config.embed_dim * (config.embed_dim + 2 * this->kv_dim) * sizeof(float))); - allocate_aligned_memory(c_attn_weight, (config.embed_dim * config.embed_dim * 3 * sizeof(float)) / 2); + allocate_aligned_memory(c_attn_weight, (config.embed_dim * config.embed_dim * 3 * sizeof(float))); allocate_aligned_memory(c_proj_weight, (config.embed_dim * config.embed_dim * sizeof(float))); float *c_attn_bias, *c_proj_bias; // allocate_aligned_memory(c_attn_bias, ((config.embed_dim + 2 * this->kv_dim) * sizeof(float))); - allocate_aligned_memory(c_attn_bias, (config.embed_dim * 3 * sizeof(float)) / 2); + allocate_aligned_memory(c_attn_bias, (config.embed_dim * 3 * sizeof(float))); allocate_aligned_memory(c_proj_bias, (config.embed_dim * sizeof(float))); // this->c_attn = diff --git a/llm/tools/download_model.py b/llm/tools/download_model.py index 4b4b9638..a530e3d0 100644 --- a/llm/tools/download_model.py +++ b/llm/tools/download_model.py @@ -50,6 +50,10 @@ "url": "https://www.dropbox.com/scl/fi/zvmdw8cdf7j0j3a99q8sx/OPT_125m.zip?rlkey=qehxgfs21m36wvm7ratwy1r5d&dl=1", "md5sum": "816958aed84120b763942ba83c1b010f", }, + "StarCoder_15.5B_fp32": { + "url": "https://www.dropbox.com/scl/fi/vc1956by8v275t0ol6vw5/StarCoder_15.5B.zip?rlkey=aydnpd9w9jhgtlfqo5krkd0yx&dl=1", + "md5sum": "e3e9301866f47ab84817b46467ac49f6", + }, } Qmodels = { @@ -86,6 +90,10 @@ "url": "https://www.dropbox.com/scl/fi/7lu8rz8z5npe2nccfr66n/OPT_6.7B.zip?rlkey=5dtie29ncqscifs2g4ylpwnz7&dl=1", # noqa: E501 "md5sum": "6d061dc64ccc60864391f484b5e564d0", }, + "StarCoder_15.5B_awq_int4": { + "url": "https://www.dropbox.com/scl/fi/fe4dkrnzc25bt166w6bby/StarCoder_15.5B.zip?rlkey=ml1x96uep2k03z78ci7s1c0yb&dl=1", + "md5sum": "0f16236c0aec0b32b553248cc78b8caf", + }, }, "QM_x86": { "LLaMA_7B_awq_int4": { @@ -120,6 +128,10 @@ "url": "https://www.dropbox.com/scl/fi/uj4z3kp5wd3cvaaiyppvs/OPT_6.7B.zip?rlkey=yw5dxd18ajsc20g3mr2rqvnnt&dl=1", # noqa: E501 "md5sum": "4aba1bee864029d06d1fec67f4d95a22", }, + "StarCoder_15.5B_awq_int4": { + "url": "https://www.dropbox.com/scl/fi/86o2cblncmfd3xvuyyaqc/StarCoder_15.5B.zip?rlkey=2gswnyq9xihencaduddylpb2k&dl=1", + "md5sum": "48383ce0bf01b137069e3612cab8525f", + }, }, "QM_CUDA": { "LLaMA2_7B_chat_awq_int4": {