Skip to content

Commit

Permalink
Update vLLM config to use llama 3.1 8B by default
Browse files Browse the repository at this point in the history
  • Loading branch information
YevheniiSemendiak committed Sep 20, 2024
1 parent 1938cac commit f8fdef0
Showing 1 changed file with 22 additions and 4 deletions.
26 changes: 22 additions & 4 deletions .neuro/live.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,40 @@ jobs:
VLLM_TOKENIZER: meta-llama/Meta-Llama-3-8B-Instruct

vllm:
image: vllm/vllm-openai:v0.5.1
image: vllm/vllm-openai:v0.6.1.post2
name: vllm
preset: H100x1
preset: gpu-medium
detach: true
http_port: "8000"
volumes:
- ${{ volumes.cache.ref_rw }}
env:
HF_TOKEN: secret:HF_TOKEN
cmd: --model meta-llama/Meta-Llama-3-8B-Instruct --tokenizer meta-llama/Meta-Llama-3-8B-Instruct --dtype=half
cmd: >
--model meta-llama/Meta-Llama-3.1-8B-Instruct
--tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
--dtype=half
--max-model-len=50000
--tensor-parallel-size=2
# cmd: >
# --model meta-llama/Meta-Llama-3.1-8B-Instruct
# --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
# --dtype=half
# cmd: >
# --model TechxGenus/Meta-Llama-3-70B-AWQ
# --tokenizer TechxGenus/Meta-Llama-3-70B-AWQ
# -q=awq
# cmd: >
# --model mgoin/Meta-Llama-3-70B-Instruct-Marlin
# --tokenizer mgoin/Meta-Llama-3-70B-Instruct-Marlin
# --dtype=half
# -q=marlin

ollama:
image: ollama/ollama:0.1.35
volumes:
- ${{ volumes.ollama_models.ref_rw }}
preset: H100x1
preset: gpu-small
detach: true
env:
MODEL: "nomic-embed-text"
Expand Down

0 comments on commit f8fdef0

Please sign in to comment.