From fe45f1c02fe99d26ee8e56162ae5b5b34442ad4f Mon Sep 17 00:00:00 2001 From: lerogo Date: Mon, 21 Oct 2024 19:56:49 +0800 Subject: [PATCH 1/2] Fix the error related to the model path in Pixtral. --- vlmeval/vlm/pixtral.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vlmeval/vlm/pixtral.py b/vlmeval/vlm/pixtral.py index bb249dee..b26fe30a 100644 --- a/vlmeval/vlm/pixtral.py +++ b/vlmeval/vlm/pixtral.py @@ -21,9 +21,12 @@ def __init__(self, model_path='mistralai/Pixtral-12B-2409', **kwargs): logging.critical('Please install `mistral-inference` and `mistral_common`') raise err - if get_cache_path(model_path) is None: - snapshot_download(repo_id=model_path) - cache_path = get_cache_path(self.model_path) + if os.path.exists(model_path): + cache_path = model_path + else: + if get_cache_path(model_path) is None: + snapshot_download(repo_id=model_path) + cache_path = get_cache_path(self.model_path) self.tokenizer = MistralTokenizer.from_file(f'{cache_path}/tekken.json') model = Transformer.from_folder(cache_path, device='cpu') From 8c0df855a5277524d8350e410c7fde94999836b9 Mon Sep 17 00:00:00 2001 From: lerogo Date: Wed, 23 Oct 2024 14:43:12 +0800 Subject: [PATCH 2/2] Fix some typos --- vlmeval/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vlmeval/config.py b/vlmeval/config.py index b4a6af75..db1946b0 100644 --- a/vlmeval/config.py +++ b/vlmeval/config.py @@ -46,7 +46,7 @@ 'Parrot': partial(Parrot, model_path='AIDC-AI/Parrot-7B'), 'OmChat': partial(OmChat, model_path='omlab/omchat-v2.0-13B-single-beta_hf'), 'RBDash_72b': partial(RBDash, model_path='RBDash-Team/RBDash-v1.2-72b', root=RBDash_ROOT), - 'Pixtral-12B': partial(Pixtral, model_path="mistralai/Pixtral-12B-2409") + 'Pixtral-12B': partial(Pixtral, model_path='mistralai/Pixtral-12B-2409') } api_models = { @@ -116,7 +116,7 @@ 'qwen_chat': partial(QwenVLChat, model_path='Qwen/Qwen-VL-Chat'), 'monkey': partial(Monkey, model_path='echo840/Monkey'), 'monkey-chat': partial(MonkeyChat, model_path='echo840/Monkey-Chat'), - 'minimonkey': partial(MiniMonkey, model_path='mx262/MiniMokney') + 'minimonkey': partial(MiniMonkey, model_path='mx262/MiniMonkey') } llava_series = { @@ -140,7 +140,7 @@ 'llava_onevision_qwen2_72b_si': partial(LLaVA_OneVision, model_path='lmms-lab/llava-onevision-qwen2-72b-si'), 'llava_onevision_qwen2_0.5b_ov': partial(LLaVA_OneVision, model_path='lmms-lab/llava-onevision-qwen2-0.5b-ov'), 'llava_onevision_qwen2_7b_ov': partial(LLaVA_OneVision, model_path='lmms-lab/llava-onevision-qwen2-7b-ov'), - 'llava_onevision_qwen2_72b_ov': partial(LLaVA_OneVision, model_path='lmms-lab/llava-onevision-qwen2-72b-ov'), + 'llava_onevision_qwen2_72b_ov': partial(LLaVA_OneVision, model_path='lmms-lab/llava-onevision-qwen2-72b-ov-sft'), } internvl_series = { @@ -263,7 +263,7 @@ 'Qwen2-VL-2B-Instruct-AWQ': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-2B-Instruct-AWQ', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen2-VL-2B-Instruct-GPTQ-Int4': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen2-VL-2B-Instruct-GPTQ-Int8': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8', min_pixels=1280*28*28, max_pixels=16384*28*28), - 'XinYuan-VL-2B-Instruct': partial(Qwen2VLChat, model_path='thomas-yanxin/XinYuan-VL-2B', min_pixels=1280*28*28, max_pixels=16384*28*28), + 'XinYuan-VL-2B-Instruct': partial(Qwen2VLChat, model_path='Cylingo/Xinyuan-VL-2B', min_pixels=1280*28*28, max_pixels=16384*28*28), } slime_series = {