Skip to content

Commit

Permalink
Merge branch 'master' into flux-singlefile2
Browse files Browse the repository at this point in the history
  • Loading branch information
sozercan authored Oct 25, 2024
2 parents 6c9aee4 + 5be2d22 commit a274a77
Show file tree
Hide file tree
Showing 24 changed files with 211 additions and 63 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=45f097645efb11b6d09a5b4adbbfd7c312ac0126
CPPLLAMA_VERSION?=958367bf530d943a902afa1ce1c342476098576b

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=a5abfe6a90495f7bf19fe70d016ecc255e97359c
WHISPER_CPP_VERSION?=0fbaac9c891055796456df7b9122a70c220f9ca1

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down
1 change: 1 addition & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ message ModelOptions {
int32 SwapSpace = 53;
int32 MaxModelLen = 54;
int32 TensorParallelSize = 55;
string LoadFormat = 58;

string MMProj = 41;

Expand Down
14 changes: 7 additions & 7 deletions backend/python/parler-tts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ installRequirements

# https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions.
PYDIR=python3.10
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
# PYDIR=python3.10
# pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"

if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
# if [ ! -d ${pyenv} ]; then
# echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
# exit 1
# fi

curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
# curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
1 change: 1 addition & 0 deletions backend/python/parler-tts/requirements-after.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
llvmlite==0.43.0
numba==0.60.0
git+https://github.com/descriptinc/audiotools
2 changes: 2 additions & 0 deletions backend/python/vllm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ async def LoadModel(self, request, context):

if request.Quantization != "":
engine_args.quantization = request.Quantization
if request.LoadFormat != "":
engine_args.load_format = request.LoadFormat
if request.GPUMemoryUtilization != 0:
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
if request.TrustRemoteCode:
Expand Down
1 change: 1 addition & 0 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
DraftModel: c.DraftModel,
AudioPath: c.VallE.AudioPath,
Quantization: c.Quantization,
LoadFormat: c.LoadFormat,
GPUMemoryUtilization: c.GPUMemoryUtilization,
TrustRemoteCode: c.TrustRemoteCode,
EnforceEager: c.EnforceEager,
Expand Down
5 changes: 5 additions & 0 deletions core/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type RunCMD struct {
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
DisableMetricsEndpoint bool `env:"LOCALAI_DISABLE_METRICS_ENDPOINT,DISABLE_METRICS_ENDPOINT" default:"false" help:"Disable the /metrics endpoint" group:"api"`
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
Expand Down Expand Up @@ -108,6 +109,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithLoadToMemory(r.LoadToMemory),
}

if r.DisableMetricsEndpoint {
opts = append(opts, config.DisableMetricsEndpoint)
}

token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" {
log.Info().Msg("P2P mode enabled")
Expand Down
5 changes: 5 additions & 0 deletions core/config/application_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type ApplicationConfig struct {
OpaqueErrors bool
UseSubtleKeyComparison bool
DisableApiKeyRequirementForHttpGet bool
DisableMetrics bool
HttpGetExemptedEndpoints []*regexp.Regexp
DisableGalleryEndpoint bool
LoadToMemory []string
Expand Down Expand Up @@ -350,6 +351,10 @@ func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
}
}

var DisableMetricsEndpoint AppOption = func(o *ApplicationConfig) {
o.DisableMetrics = true
}

func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
return func(o *ApplicationConfig) {
o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
Expand Down
5 changes: 2 additions & 3 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ type LLMConfig struct {
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
Expand Down Expand Up @@ -197,9 +198,7 @@ type TemplateConfig struct {
// It defaults to \n
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`

Video string `yaml:"video"`
Image string `yaml:"image"`
Audio string `yaml:"audio"`
Multimodal string `yaml:"multimodal"`
}

func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
Expand Down
24 changes: 13 additions & 11 deletions core/http/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
app.Use(recover.New())
}

metricsService, err := services.NewLocalAIMetricsService()
if err != nil {
return nil, err
}
if !appConfig.DisableMetrics {
metricsService, err := services.NewLocalAIMetricsService()
if err != nil {
return nil, err
}

if metricsService != nil {
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
app.Hooks().OnShutdown(func() error {
return metricsService.Shutdown()
})
}
if metricsService != nil {
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
app.Hooks().OnShutdown(func() error {
return metricsService.Shutdown()
})
}

// Health Checks should always be exempt from auth, so register these first
}
// Health Checks should always be exempt from auth, so register these first
routes.HealthRoutes(app)

kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
Expand Down
43 changes: 23 additions & 20 deletions core/http/endpoints/openai/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,27 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
// Decode each request's message content
imgIndex, vidIndex, audioIndex := 0, 0, 0
for i, m := range input.Messages {
nrOfImgsInMessage := 0
nrOfVideosInMessage := 0
nrOfAudiosInMessage := 0

switch content := m.Content.(type) {
case string:
input.Messages[i].StringContent = content
case []interface{}:
dat, _ := json.Marshal(content)
c := []schema.Content{}
json.Unmarshal(dat, &c)

textContent := ""
// we will template this at the end

CONTENT:
for _, pp := range c {
switch pp.Type {
case "text":
input.Messages[i].StringContent = pp.Text
textContent += pp.Text
//input.Messages[i].StringContent = pp.Text
case "video", "video_url":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
Expand All @@ -169,14 +178,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
continue CONTENT
}
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff

t := "[vid-{{.ID}}]{{.Text}}"
if config.TemplateConfig.Video != "" {
t = config.TemplateConfig.Video
}
// set a placeholder for each image
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, vidIndex, input.Messages[i].StringContent)
vidIndex++
nrOfVideosInMessage++
case "audio_url", "audio":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
Expand All @@ -185,13 +188,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
continue CONTENT
}
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
t := "[audio-{{.ID}}]{{.Text}}"
if config.TemplateConfig.Audio != "" {
t = config.TemplateConfig.Audio
}
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, audioIndex, input.Messages[i].StringContent)
audioIndex++
nrOfAudiosInMessage++
case "image_url", "image":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
Expand All @@ -200,16 +198,21 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
continue CONTENT
}

t := "[img-{{.ID}}]{{.Text}}"
if config.TemplateConfig.Image != "" {
t = config.TemplateConfig.Image
}
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, imgIndex, input.Messages[i].StringContent)

imgIndex++
nrOfImgsInMessage++
}
}

input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
TotalImages: imgIndex,
TotalVideos: vidIndex,
TotalAudios: audioIndex,
ImagesInMessage: nrOfImgsInMessage,
VideosInMessage: nrOfVideosInMessage,
AudiosInMessage: nrOfAudiosInMessage,
}, textContent)
}
}

Expand Down
4 changes: 3 additions & 1 deletion core/http/routes/localai.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ func RegisterLocalAIRoutes(app *fiber.App,
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))

app.Get("/metrics", localai.LocalAIMetricsEndpoint())
if !appConfig.DisableMetrics {
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
}

// Experimental Backend Statistics Module
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
Expand Down
13 changes: 13 additions & 0 deletions docs/content/docs/getting-started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ For a full list of options, refer to the [Installer Options]({{% relref "docs/ad

Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).

## Using Homebrew on MacOS

{{% alert icon="⚠️" %}}
The Homebrew formula currently doesn't have the same options than the bash script
{{% /alert %}}

You can install Homebrew's [LocalAI](https://formulae.brew.sh/formula/localai) with the following command:

```
brew install localai
```


## Using Container Images or Kubernetes

LocalAI is available as a container image compatible with various container engines such as Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai).
Expand Down
2 changes: 1 addition & 1 deletion docs/data/version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "v2.22.0"
"version": "v2.22.1"
}
4 changes: 2 additions & 2 deletions examples/chainlit/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
llama_index==0.11.17
llama_index==0.11.19
requests==2.32.3
weaviate_client==4.8.1
weaviate_client==4.9.0
transformers
torch
chainlit
4 changes: 2 additions & 2 deletions examples/functions/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
langchain==0.3.3
openai==1.51.2
langchain==0.3.4
openai==1.52.0
4 changes: 2 additions & 2 deletions examples/langchain-chroma/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
langchain==0.3.3
openai==1.51.2
openai==1.52.0
chromadb==0.5.13
llama-index==0.11.17
llama-index==0.11.19
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ frozenlist==1.4.1
greenlet==3.1.1
idna==3.10
langchain==0.3.3
langchain-community==0.3.2
marshmallow==3.22.0
langchain-community==0.3.3
marshmallow==3.23.0
marshmallow-enum==1.5.1
multidict==6.1.0
mypy-extensions==1.0.0
Expand All @@ -24,10 +24,10 @@ packaging>=23.2
pydantic==2.9.2
PyYAML==6.0.2
requests==2.32.3
SQLAlchemy==2.0.35
SQLAlchemy==2.0.36
tenacity==8.5.0
tqdm==4.66.5
typing-inspect==0.9.0
typing_extensions==4.12.2
urllib3==2.2.3
yarl==1.15.2
yarl==1.16.0
2 changes: 2 additions & 0 deletions gallery/phi-3-vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ config_file: |
completion: |
{{.Input}}
use_tokenizer_template: false
multimodal: "{{ range .Images }}<|image_{{ add1 .ID}}|>{{end}}\n{{.Text}}"
# XXX: The one below can be dropped after a new release is out
image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ require (
github.com/moby/sys/sequential v0.5.0 // indirect
github.com/moby/term v0.5.0 // indirect
github.com/mr-tron/base58 v1.2.0 // indirect
github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d
github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb
github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.2 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,8 @@ github.com/mudler/edgevpn v0.28.3 h1:yIuoMExwKHy/mNMBXIsm6FUFbnB9ELIxw9KXrK9KHDk
github.com/mudler/edgevpn v0.28.3/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb h1:5qcuxQEpAqeV4ftV5nUt3/hB/RoTXq3MaaauOAedyXo=
github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f h1:cxtMSRkUfy+mjIQ3yMrU0txwQ4It913NEN4m1H8WWgo=
Expand Down
Loading

0 comments on commit a274a77

Please sign in to comment.