From 0fbdb69249dd4e9ca84fafef8e88103b330bfaed Mon Sep 17 00:00:00 2001 From: ebsmothers Date: Fri, 12 Jul 2024 10:07:09 -0700 Subject: [PATCH] update models docs (#1167) --- docs/source/api_ref_models.rst | 47 +++++++++++++++---- .../models/code_llama2/_model_builders.py | 3 ++ torchtune/models/llama2/__init__.py | 2 - torchtune/models/llama2/_model_builders.py | 3 ++ torchtune/models/llama3/__init__.py | 2 - torchtune/models/mistral/_model_builders.py | 2 +- torchtune/models/phi3/_sentencepiece.py | 8 ++-- 7 files changed, 48 insertions(+), 19 deletions(-) diff --git a/docs/source/api_ref_models.rst b/docs/source/api_ref_models.rst index b5a8ba60ed..c1c047545f 100644 --- a/docs/source/api_ref_models.rst +++ b/docs/source/api_ref_models.rst @@ -11,9 +11,18 @@ llama3 All models from the `Llama3 family `_. +To download the Llama3-8B-Instruct model: + +.. code-block:: bash + + tune download meta-llama/Meta-Llama-3-8B-Instruct --hf-token + +To download the Llama3-70B-Instruct model: + .. code-block:: bash - tune download meta-llama/Meta-Llama-3-8B-Instruct --hf-token + tune download meta-llama/Meta-Llama-3-70B-Instruct --hf-token + --ignore-patterns "original/consolidated*" .. autosummary:: @@ -34,11 +43,23 @@ llama2 All models from the `Llama2 family `_. -Pre-trained models can be downloaded from the Hugging Face Hub with the following command: +To download the Llama2-7B model: + +.. code-block:: bash + + tune download meta-llama/Llama-2-7b-hf --hf-token + +To download the Llama2-13B model: .. code-block:: bash - tune download meta-llama/Llama-2-7b-hf --hf-token + tune download meta-llama/Llama-2-13b-hf --hf-token + +To download the Llama2-70B model: + +.. code-block:: bash + + tune download meta-llama/Llama-2-70b-hf --hf-token .. autosummary:: :toctree: generated/ @@ -61,11 +82,11 @@ code llama Models from the `Code Llama family `_. -Pre-trained models can be downloaded from the Hugging Face Hub with the following command: +To download the CodeLlama-7B model: .. code-block:: bash - tune download codellama/CodeLlama-7b-hf --hf-token + tune download codellama/CodeLlama-7b-hf --hf-token .. autosummary:: :toctree: generated/ @@ -87,7 +108,7 @@ phi-3 Models from the `Phi-3 mini family `_. -Pre-trained models can be download from the Hugging Face Hub with the following command: +To download the Phi-3 Mini 4k instruct model: .. code-block:: bash @@ -108,11 +129,11 @@ mistral All models from `Mistral AI family `_. -Pre-trained models can be downloaded from the Hugging Face Hub with the following command: +To download the Mistral 7B v0.1 model: .. code-block:: bash - tune download mistralai/Mistral-7B-v0.1 + tune download mistralai/Mistral-7B-v0.1 --hf-token .. autosummary:: :toctree: generated/ @@ -132,11 +153,17 @@ gemma Models of size 2B and 7B from the `Gemma family `_. -Pre-trained models can be downloaded from the Hugging Face Hub with the following command: +To download the Gemma 2B model: + +.. code-block:: bash + + tune download google/gemma-2b --hf-token --ignore-patterns "" + +To download the Gemma 7B model: .. code-block:: bash - tune download google/gemma-2b --hf-token --ignore-patterns "" + tune download google/gemma-7b --hf-token --ignore-patterns "gemma-7b.gguf" .. autosummary:: :toctree: generated/ diff --git a/torchtune/models/code_llama2/_model_builders.py b/torchtune/models/code_llama2/_model_builders.py index 47b17b724b..fca17367d8 100644 --- a/torchtune/models/code_llama2/_model_builders.py +++ b/torchtune/models/code_llama2/_model_builders.py @@ -59,6 +59,7 @@ def lora_code_llama2_7b( Default: False lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation + lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05 quantize_base (bool): Whether to quantize base model weights Returns: @@ -139,6 +140,7 @@ def lora_code_llama2_13b( Default: False lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation + lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05 quantize_base (bool): Whether to quantize base model weights Returns: @@ -220,6 +222,7 @@ def lora_code_llama2_70b( Default: False lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation + lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05 quantize_base (bool): Whether to quantize base model weights Returns: diff --git a/torchtune/models/llama2/__init__.py b/torchtune/models/llama2/__init__.py index e45227ec96..ccdf875fe1 100644 --- a/torchtune/models/llama2/__init__.py +++ b/torchtune/models/llama2/__init__.py @@ -18,7 +18,6 @@ qlora_llama2_70b, qlora_llama2_7b, ) -from ._model_utils import scale_hidden_dim_for_mlp __all__ = [ "llama2", @@ -33,5 +32,4 @@ "qlora_llama2_13b", "qlora_llama2_70b", "qlora_llama2_7b", - "scale_hidden_dim_for_mlp", ] diff --git a/torchtune/models/llama2/_model_builders.py b/torchtune/models/llama2/_model_builders.py index 459e872639..6cae9e62ea 100644 --- a/torchtune/models/llama2/_model_builders.py +++ b/torchtune/models/llama2/_model_builders.py @@ -83,6 +83,7 @@ def lora_llama2_7b( lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation quantize_base (bool): Whether to quantize base model weights + lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05 Returns: TransformerDecoder: Instantiation of Llama2 7B model with LoRA applied @@ -162,6 +163,7 @@ def lora_llama2_13b( Default: False lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation + lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05 quantize_base (bool): Whether to quantize base model weights Returns: @@ -243,6 +245,7 @@ def lora_llama2_70b( Default: False lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation + lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05 quantize_base (bool): Whether to quantize base model weights Returns: diff --git a/torchtune/models/llama3/__init__.py b/torchtune/models/llama3/__init__.py index 44b66eed8f..702c19383d 100644 --- a/torchtune/models/llama3/__init__.py +++ b/torchtune/models/llama3/__init__.py @@ -15,7 +15,6 @@ qlora_llama3_70b, qlora_llama3_8b, ) -from ._model_utils import scale_hidden_dim_for_mlp __all__ = [ "llama3", @@ -27,5 +26,4 @@ "lora_llama3_70b", "qlora_llama3_8b", "qlora_llama3_70b", - "scale_hidden_dim_for_mlp", ] diff --git a/torchtune/models/mistral/_model_builders.py b/torchtune/models/mistral/_model_builders.py index 3fb919fec6..891a969436 100644 --- a/torchtune/models/mistral/_model_builders.py +++ b/torchtune/models/mistral/_model_builders.py @@ -126,7 +126,7 @@ def mistral_classifier_7b() -> TransformerDecoder: Returns: - TransformerClassifier: Instantiation of Mistral 7B classifier model + TransformerDecoder: Instantiation of Mistral 7B classifier model """ return mistral_classifier( num_classes=1, diff --git a/torchtune/models/phi3/_sentencepiece.py b/torchtune/models/phi3/_sentencepiece.py index 390afa4f39..d68c5143fd 100644 --- a/torchtune/models/phi3/_sentencepiece.py +++ b/torchtune/models/phi3/_sentencepiece.py @@ -136,13 +136,13 @@ def tokenize_messages( Message(role="system", content="system message\n", masked=True), Message(role="user", content="user prompt\n", masked=True), Message(role="assistant", content="assistant response\n"), - ] - # tokenize_messages encodes messages separately and concats + ] + + >>> # tokenize_messages encodes messages separately and concats >>> tokenizer.tokenize_messages(messages, max_seq_len)[0] [1, 1788, 2643, 13, 1792, 9508, 13, 465, 22137, 2933, 2] - - # Same result as encoding the full string in one go + >>> # Same result as encoding the full string in one go >>> tokenizer.encode(''.join([message.content for message in messages])) [1, 1788, 2643, 13, 1792, 9508, 13, 465, 22137, 2933, 2]