Add ollama GPU test.

This runs https://ollama.ai/ in a gVisor container and loads two models: an English-Chinese translation model, and a code assistant model. It asks the first one to translate "Hello World" to Chinese, and then asks the second one to generate a test case to verify that the translation is correct. This change includes a server and client library for spawning ollama in a container and interacting through its HTTP API. This will be useful to turn it into a benchmark that measures its throughput in tokens/second. PiperOrigin-RevId: 590295278
google · Dec 12, 2023 · 07e86e2 · 07e86e2
1 parent b3bb6fa
commit 07e86e2
Show file tree

Hide file tree

Showing 9 changed files with 690 additions and 13 deletions.
diff --git a/Makefile b/Makefile
@@ -273,18 +273,39 @@ arm-qemu-smoke-test: $(RUNTIME_BIN) load-arm-qemu
 simple-tests: unit-tests # Compatibility target.
 .PHONY: simple-tests
 
-gpu-smoke-tests: load-basic_cuda-vector-add load-gpu_cuda-tests $(RUNTIME_BIN)
+# Images needed for GPU smoke tests.
+gpu-smoke-images: load-basic_cuda-vector-add load-gpu_cuda-tests
+.PHONY: gpu-smoke-images
+
+gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN)
 	@$(call test,--test_env=RUNTIME=runc //test/gpu:gpu_smoke_test)
 	@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true)
 	@$(call sudo,test/gpu:gpu_smoke_test,--runtime=$(RUNTIME) -test.v $(ARGS))
 .PHONY: gpu-smoke-tests
 
-cos-gpu-smoke-tests: load-basic_cuda-vector-add load-gpu_cuda-tests $(RUNTIME_BIN)
+cos-gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN)
 	@$(call sudo,test/gpu:gpu_smoke_test,--runtime=runc -test.v --cos-gpu $(ARGS))
 	@$(call install_runtime,$(RUNTIME),--nvproxy=true)
 	@$(call sudo,test/gpu:gpu_smoke_test,--runtime=$(RUNTIME) -test.v --cos-gpu $(ARGS))
 .PHONY: cos-gpu-smoke-tests
 
+# Images needed for GPU tests.
+# This is a superset of those needed for smoke tests.
+# It includes non-GPU images that are used as part of GPU tests,
+# e.g. busybox and python.
+gpu-images: gpu-smoke-images load-gpu_ollama load-basic_busybox load-basic_python
+.PHONY: gpu-images
+
+gpu-all-tests: gpu-images gpu-smoke-tests $(RUNTIME_BIN)
+	@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true)
+	@$(call sudo,test/gpu:textgen_test,--runtime=$(RUNTIME) -test.v $(ARGS))
+.PHONY: gpu-all-tests
+
+cos-gpu-all-tests: gpu-images cos-gpu-smoke-tests $(RUNTIME_BIN)
+	@$(call install_runtime,$(RUNTIME),--nvproxy=true)
+	@$(call sudo,test/gpu:textgen_test,--runtime=$(RUNTIME) -test.v --cos-gpu $(ARGS))
+.PHONY: cos-gpu-all-tests
+
 portforward-tests: load-basic_redis load-basic_nginx $(RUNTIME_BIN)
 	@$(call install_runtime,$(RUNTIME),--network=sandbox)
 	@$(call sudo,test/root:portforward_test,--runtime=$(RUNTIME) -test.v $(ARGS))

diff --git a/images/gpu/cuda-tests/Dockerfile b/images/gpu/cuda-tests/Dockerfile
@@ -4,4 +4,5 @@ WORKDIR /
 COPY cuda_malloc_managed.cu .
 COPY cuda_test_util.h .
 COPY run.sh .
+ENV PATH=$PATH:/usr/local/nvidia/bin:/bin/nvidia/bin
 ENTRYPOINT ["/run.sh"]
diff --git a/images/gpu/ollama/Dockerfile b/images/gpu/ollama/Dockerfile
@@ -0,0 +1,17 @@
+# https://hub.docker.com/r/ollama/ollama
+FROM ollama/ollama:0.1.13
+
+ENV PATH=$PATH:/usr/local/nvidia/bin:/bin/nvidia/bin
+
+# Pre-install a few models.
+# Although these are the smallest possible model size (7B parameters),
+# these are still quite large and it would take too long for tests to
+# download them on every run.
+RUN bash -c '                                  \
+    ( ollama serve ) & serverpid="$!";         \
+    sleep 5;                                   \
+    ollama pull codellama:7b &&                \
+    ollama pull llama2-chinese:7b-chat &&      \
+    kill "$serverpid" &&                       \
+    wait "$serverpid"                          \
+'
diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go
@@ -86,6 +86,9 @@ type RunOpts struct {
 	// User is the user to use.
 	User string
 
+	// Optional argv to override the ENTRYPOINT specified in the image.
+	Entrypoint []string
+
 	// Privileged enables privileged mode.
 	Privileged bool
 
@@ -260,6 +263,7 @@ func (c *Container) config(r RunOpts, args []string) *container.Config {
 	return &container.Config{
 		Image:        testutil.ImageByName(r.Image),
 		Cmd:          args,
+		Entrypoint:   r.Entrypoint,
 		ExposedPorts: ports,
 		Env:          env,
 		WorkingDir:   r.WorkDir,

diff --git a/pkg/test/dockerutil/gpu.go b/pkg/test/dockerutil/gpu.go
@@ -17,6 +17,7 @@ package dockerutil
 
 import (
 	"flag"
+	"os"
 
 	"github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/mount"
@@ -55,17 +56,32 @@ func GPURunOpts() RunOpts {
 		})
 	}
 
-	mounts := []mount.Mount{
-		{
-			Source: "/var/lib/nvidia/lib64",
-			Target: "/usr/local/nvidia/lib64",
-			Type:   mount.TypeBind,
-		},
-		{
-			Source: "/var/lib/nvidia/bin",
-			Target: "/usr/local/nvidia/bin",
-			Type:   mount.TypeBind,
-		},
+	var mounts []mount.Mount
+	for _, nvidiaBin := range []string{
+		"/home/kubernetes/bin/nvidia/bin",
+		"/var/lib/nvidia/bin",
+	} {
+		if st, err := os.Stat(nvidiaBin); err == nil && st.IsDir() {
+			mounts = append(mounts, mount.Mount{
+				Source:   nvidiaBin,
+				Target:   "/usr/local/nvidia/bin",
+				Type:     mount.TypeBind,
+				ReadOnly: true,
+			})
+		}
+	}
+	for _, nvidiaLib64 := range []string{
+		"/home/kubernetes/bin/nvidia/lib64",
+		"/var/lib/nvidia/lib64",
+	} {
+		if st, err := os.Stat(nvidiaLib64); err == nil && st.IsDir() {
+			mounts = append(mounts, mount.Mount{
+				Source:   nvidiaLib64,
+				Target:   "/usr/local/nvidia/lib64",
+				Type:     mount.TypeBind,
+				ReadOnly: true,
+			})
+		}
 	}
 
 	return RunOpts{

diff --git a/test/gpu/BUILD b/test/gpu/BUILD
@@ -16,3 +16,19 @@ go_test(
     visibility = ["//:sandbox"],
     deps = ["//pkg/test/dockerutil"],
 )
+
+go_test(
+    name = "textgen_test",
+    srcs = ["textgen_test.go"],
+    tags = [
+        "local",
+        "noguitar",
+        "notap",
+    ],
+    visibility = ["//:sandbox"],
+    deps = [
+        "//pkg/test/dockerutil",
+        "//pkg/test/testutil",
+        "//test/gpu/ollama",
+    ],
+)
diff --git a/test/gpu/ollama/BUILD b/test/gpu/ollama/BUILD
@@ -0,0 +1,17 @@
+load("//tools:defs.bzl", "go_library")
+
+package(
+    default_applicable_licenses = ["//:license"],
+    licenses = ["notice"],
+)
+
+go_library(
+    name = "ollama",
+    testonly = 1,
+    srcs = ["ollama.go"],
+    visibility = ["//:sandbox"],
+    deps = [
+        "//pkg/test/dockerutil",
+        "//pkg/test/testutil",
+    ],
+)