Merge branch 'master' into flux-singlefile2

mudler · Oct 25, 2024 · a274a77 · a274a77
2 parents 6c9aee4 + 5be2d22
commit a274a77
Show file tree

Hide file tree

Showing 24 changed files with 211 additions and 63 deletions.
diff --git a/Makefile b/Makefile
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=45f097645efb11b6d09a5b4adbbfd7c312ac0126
+CPPLLAMA_VERSION?=958367bf530d943a902afa1ce1c342476098576b
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=a5abfe6a90495f7bf19fe70d016ecc255e97359c
+WHISPER_CPP_VERSION?=0fbaac9c891055796456df7b9122a70c220f9ca1
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

diff --git a/backend/backend.proto b/backend/backend.proto
@@ -219,6 +219,7 @@ message ModelOptions {
   int32  SwapSpace = 53;
   int32  MaxModelLen = 54;
   int32  TensorParallelSize = 55;
+  string LoadFormat = 58;
 
   string MMProj = 41;
 

diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh
@@ -15,12 +15,12 @@ installRequirements
 
 # https://github.com/descriptinc/audiotools/issues/101
 # incompatible protobuf versions.
-PYDIR=python3.10
-pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
+# PYDIR=python3.10
+# pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
 
-if [ ! -d ${pyenv} ]; then
-    echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
-    exit 1
-fi
+# if [ ! -d ${pyenv} ]; then
+#     echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
+#     exit 1
+# fi
 
-curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
+# curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt
@@ -1,3 +1,4 @@
 git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
 llvmlite==0.43.0
 numba==0.60.0
+git+https://github.com/descriptinc/audiotools
diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
@@ -95,6 +95,8 @@ async def LoadModel(self, request, context):
 
         if request.Quantization != "":
             engine_args.quantization = request.Quantization
+        if request.LoadFormat != "":
+            engine_args.load_format = request.LoadFormat
         if request.GPUMemoryUtilization != 0:
             engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
         if request.TrustRemoteCode:

diff --git a/core/backend/options.go b/core/backend/options.go
@@ -139,6 +139,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		DraftModel:           c.DraftModel,
 		AudioPath:            c.VallE.AudioPath,
 		Quantization:         c.Quantization,
+		LoadFormat:           c.LoadFormat,
 		GPUMemoryUtilization: c.GPUMemoryUtilization,
 		TrustRemoteCode:      c.TrustRemoteCode,
 		EnforceEager:         c.EnforceEager,

diff --git a/core/cli/run.go b/core/cli/run.go
@@ -53,6 +53,7 @@ type RunCMD struct {
 	OpaqueErrors                       bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	UseSubtleKeyComparison             bool     `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
 	DisableApiKeyRequirementForHttpGet bool     `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
+	DisableMetricsEndpoint             bool     `env:"LOCALAI_DISABLE_METRICS_ENDPOINT,DISABLE_METRICS_ENDPOINT" default:"false" help:"Disable the /metrics endpoint" group:"api"`
 	HttpGetExemptedEndpoints           []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
 	Peer2Peer                          bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
 	Peer2PeerDHTInterval               int      `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
@@ -108,6 +109,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithLoadToMemory(r.LoadToMemory),
 	}
 
+	if r.DisableMetricsEndpoint {
+		opts = append(opts, config.DisableMetricsEndpoint)
+	}
+
 	token := ""
 	if r.Peer2Peer || r.Peer2PeerToken != "" {
 		log.Info().Msg("P2P mode enabled")

diff --git a/core/config/application_config.go b/core/config/application_config.go
@@ -39,6 +39,7 @@ type ApplicationConfig struct {
 	OpaqueErrors                       bool
 	UseSubtleKeyComparison             bool
 	DisableApiKeyRequirementForHttpGet bool
+	DisableMetrics                     bool
 	HttpGetExemptedEndpoints           []*regexp.Regexp
 	DisableGalleryEndpoint             bool
 	LoadToMemory                       []string
@@ -350,6 +351,10 @@ func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
 	}
 }
 
+var DisableMetricsEndpoint AppOption = func(o *ApplicationConfig) {
+	o.DisableMetrics = true
+}
+
 func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.HttpGetExemptedEndpoints = []*regexp.Regexp{}

diff --git a/core/config/backend_config.go b/core/config/backend_config.go
@@ -143,6 +143,7 @@ type LLMConfig struct {
 	DraftModel           string  `yaml:"draft_model"`
 	NDraft               int32   `yaml:"n_draft"`
 	Quantization         string  `yaml:"quantization"`
+	LoadFormat           string  `yaml:"load_format"`
 	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
 	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
 	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
@@ -197,9 +198,7 @@ type TemplateConfig struct {
 	// It defaults to \n
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
 
-	Video string `yaml:"video"`
-	Image string `yaml:"image"`
-	Audio string `yaml:"audio"`
+	Multimodal string `yaml:"multimodal"`
 }
 
 func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {

diff --git a/core/http/app.go b/core/http/app.go
@@ -109,19 +109,21 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 		app.Use(recover.New())
 	}
 
-	metricsService, err := services.NewLocalAIMetricsService()
-	if err != nil {
-		return nil, err
-	}
+	if !appConfig.DisableMetrics {
+		metricsService, err := services.NewLocalAIMetricsService()
+		if err != nil {
+			return nil, err
+		}
 
-	if metricsService != nil {
-		app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
-		app.Hooks().OnShutdown(func() error {
-			return metricsService.Shutdown()
-		})
-	}
+		if metricsService != nil {
+			app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+			app.Hooks().OnShutdown(func() error {
+				return metricsService.Shutdown()
+			})
+		}
 
- // Health Checks should always be exempt from auth, so register these first
+	}
+	// Health Checks should always be exempt from auth, so register these first
 	routes.HealthRoutes(app)
 
 	kaConfig, err := middleware.GetKeyAuthConfig(appConfig)

diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
@@ -149,18 +149,27 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 	// Decode each request's message content
 	imgIndex, vidIndex, audioIndex := 0, 0, 0
 	for i, m := range input.Messages {
+		nrOfImgsInMessage := 0
+		nrOfVideosInMessage := 0
+		nrOfAudiosInMessage := 0
+
 		switch content := m.Content.(type) {
 		case string:
 			input.Messages[i].StringContent = content
 		case []interface{}:
 			dat, _ := json.Marshal(content)
 			c := []schema.Content{}
 			json.Unmarshal(dat, &c)
+
+			textContent := ""
+			// we will template this at the end
+
 		CONTENT:
 			for _, pp := range c {
 				switch pp.Type {
 				case "text":
-					input.Messages[i].StringContent = pp.Text
+					textContent += pp.Text
+					//input.Messages[i].StringContent = pp.Text
 				case "video", "video_url":
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
@@ -169,14 +178,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}
 					input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
-
-					t := "[vid-{{.ID}}]{{.Text}}"
-					if config.TemplateConfig.Video != "" {
-						t = config.TemplateConfig.Video
-					}
-					// set a placeholder for each image
-					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, vidIndex, input.Messages[i].StringContent)
 					vidIndex++
+					nrOfVideosInMessage++
 				case "audio_url", "audio":
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
@@ -185,13 +188,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}
 					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
-					// set a placeholder for each image
-					t := "[audio-{{.ID}}]{{.Text}}"
-					if config.TemplateConfig.Audio != "" {
-						t = config.TemplateConfig.Audio
-					}
-					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, audioIndex, input.Messages[i].StringContent)
 					audioIndex++
+					nrOfAudiosInMessage++
 				case "image_url", "image":
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
@@ -200,16 +198,21 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 						continue CONTENT
 					}
 
-					t := "[img-{{.ID}}]{{.Text}}"
-					if config.TemplateConfig.Image != "" {
-						t = config.TemplateConfig.Image
-					}
 					input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
-					// set a placeholder for each image
-					input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, imgIndex, input.Messages[i].StringContent)
+
 					imgIndex++
+					nrOfImgsInMessage++
 				}
 			}
+
+			input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
+				TotalImages:     imgIndex,
+				TotalVideos:     vidIndex,
+				TotalAudios:     audioIndex,
+				ImagesInMessage: nrOfImgsInMessage,
+				VideosInMessage: nrOfVideosInMessage,
+				AudiosInMessage: nrOfAudiosInMessage,
+			}, textContent)
 		}
 	}
 

diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
@@ -42,7 +42,9 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
 	app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
 
-	app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+	if !appConfig.DisableMetrics {
+		app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+	}
 
 	// Experimental Backend Statistics Module
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now

diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
@@ -30,6 +30,19 @@ For a full list of options, refer to the [Installer Options]({{% relref "docs/ad
 
 Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).
 
+## Using Homebrew on MacOS
+
+{{% alert icon="⚠️" %}}
+The Homebrew formula currently doesn't have the same options than the bash script
+{{% /alert %}}
+
+You can install Homebrew's [LocalAI](https://formulae.brew.sh/formula/localai) with the following command:
+
+```
+brew install localai
+```
+
+
 ## Using Container Images or Kubernetes
 
 LocalAI is available as a container image compatible with various container engines such as Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai).

diff --git a/docs/data/version.json b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.22.0"
+  "version": "v2.22.1"
 }
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
@@ -1,6 +1,6 @@
-llama_index==0.11.17
+llama_index==0.11.19
 requests==2.32.3
-weaviate_client==4.8.1
+weaviate_client==4.9.0
 transformers
 torch
 chainlit
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.3.3
-openai==1.51.2
+langchain==0.3.4
+openai==1.52.0
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.3.3
-openai==1.51.2
+openai==1.52.0
 chromadb==0.5.13
-llama-index==0.11.17
+llama-index==0.11.19
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -11,8 +11,8 @@ frozenlist==1.4.1
 greenlet==3.1.1
 idna==3.10
 langchain==0.3.3
-langchain-community==0.3.2
-marshmallow==3.22.0
+langchain-community==0.3.3
+marshmallow==3.23.0
 marshmallow-enum==1.5.1
 multidict==6.1.0
 mypy-extensions==1.0.0
@@ -24,10 +24,10 @@ packaging>=23.2
 pydantic==2.9.2
 PyYAML==6.0.2
 requests==2.32.3
-SQLAlchemy==2.0.35
+SQLAlchemy==2.0.36
 tenacity==8.5.0
 tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.3
-yarl==1.15.2
+yarl==1.16.0
diff --git a/gallery/phi-3-vision.yaml b/gallery/phi-3-vision.yaml
@@ -20,4 +20,6 @@ config_file: |
     completion: |
         {{.Input}}
     use_tokenizer_template: false
+    multimodal: "{{ range .Images }}<|image_{{ add1 .ID}}|>{{end}}\n{{.Text}}"
+    # XXX: The one below can be dropped after a new release is out
     image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"
diff --git a/go.mod b/go.mod
@@ -231,7 +231,7 @@ require (
 	github.com/moby/sys/sequential v0.5.0 // indirect
 	github.com/moby/term v0.5.0 // indirect
 	github.com/mr-tron/base58 v1.2.0 // indirect
-	github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d
+	github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb
 	github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 // indirect
 	github.com/muesli/reflow v0.3.0 // indirect
 	github.com/muesli/termenv v0.15.2 // indirect

diff --git a/go.sum b/go.sum
@@ -498,6 +498,8 @@ github.com/mudler/edgevpn v0.28.3 h1:yIuoMExwKHy/mNMBXIsm6FUFbnB9ELIxw9KXrK9KHDk
 github.com/mudler/edgevpn v0.28.3/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
+github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb h1:5qcuxQEpAqeV4ftV5nUt3/hB/RoTXq3MaaauOAedyXo=
+github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
 github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
 github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f h1:cxtMSRkUfy+mjIQ3yMrU0txwQ4It913NEN4m1H8WWgo=