From b0829d095da20b513646b27b0d5fa40667548e65 Mon Sep 17 00:00:00 2001 From: Mahad Zaryab <43658574+mahadzaryab1@users.noreply.github.com> Date: Sat, 12 Oct 2024 00:02:11 -0400 Subject: [PATCH] [jaeger-v2] Implement empty service name sanitizer for OTLP (#6077) ## Which problem is this PR solving? - Towards #5545 ## Description of the changes - Created a new `Sanitizer` type for sanitizing traces in OTLP format - Implemented the empty service name sanitizer; I'll open a separate PR for the UTF-8 sanitizer ## How was this change tested? - Unit tests ## Checklist - [x] I have read https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - for `jaeger`: `make lint test` - for `jaeger-ui`: `yarn lint` and `yarn test` --------- Signed-off-by: Mahad Zaryab Signed-off-by: Saumya Shah --- .../exporters/storageexporter/exporter.go | 7 +- .../storageexporter/exporter_test.go | 3 + .../internal/sanitizer/emptyservicename.go | 41 ++++++++++ .../sanitizer/emptyservicename_test.go | 80 +++++++++++++++++++ cmd/jaeger/internal/sanitizer/package_test.go | 14 ++++ cmd/jaeger/internal/sanitizer/sanitizer.go | 33 ++++++++ .../internal/sanitizer/sanitizer_test.go | 57 +++++++++++++ 7 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 cmd/jaeger/internal/sanitizer/emptyservicename.go create mode 100644 cmd/jaeger/internal/sanitizer/emptyservicename_test.go create mode 100644 cmd/jaeger/internal/sanitizer/package_test.go create mode 100644 cmd/jaeger/internal/sanitizer/sanitizer.go create mode 100644 cmd/jaeger/internal/sanitizer/sanitizer_test.go diff --git a/cmd/jaeger/internal/exporters/storageexporter/exporter.go b/cmd/jaeger/internal/exporters/storageexporter/exporter.go index 2e29be4cc39..2cb7e8d1ba8 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/exporter.go +++ b/cmd/jaeger/internal/exporters/storageexporter/exporter.go @@ -12,6 +12,7 @@ import ( "go.uber.org/zap" "github.com/jaegertracing/jaeger/cmd/jaeger/internal/extension/jaegerstorage" + "github.com/jaegertracing/jaeger/cmd/jaeger/internal/sanitizer" "github.com/jaegertracing/jaeger/storage_v2/spanstore" ) @@ -19,12 +20,16 @@ type storageExporter struct { config *Config logger *zap.Logger traceWriter spanstore.Writer + sanitizer sanitizer.Func } func newExporter(config *Config, otel component.TelemetrySettings) *storageExporter { return &storageExporter{ config: config, logger: otel.Logger, + sanitizer: sanitizer.NewChainedSanitizer( + sanitizer.NewStandardSanitizers()..., + ), } } @@ -47,5 +52,5 @@ func (*storageExporter) close(_ context.Context) error { } func (exp *storageExporter) pushTraces(ctx context.Context, td ptrace.Traces) error { - return exp.traceWriter.WriteTraces(ctx, td) + return exp.traceWriter.WriteTraces(ctx, exp.sanitizer(td)) } diff --git a/cmd/jaeger/internal/exporters/storageexporter/exporter_test.go b/cmd/jaeger/internal/exporters/storageexporter/exporter_test.go index f5286679924..a73375faadd 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/exporter_test.go +++ b/cmd/jaeger/internal/exporters/storageexporter/exporter_test.go @@ -157,6 +157,9 @@ func TestExporter(t *testing.T) { requiredTrace, err := spanReader.GetTrace(ctx, requiredTraceID) require.NoError(t, err) assert.Equal(t, spanID.String(), requiredTrace.Spans[0].SpanID.String()) + + // check that the service name attribute was added by the sanitizer + require.Equal(t, "missing-service-name", requiredTrace.Spans[0].Process.ServiceName) } func makeStorageExtension(t *testing.T, memstoreName string) component.Host { diff --git a/cmd/jaeger/internal/sanitizer/emptyservicename.go b/cmd/jaeger/internal/sanitizer/emptyservicename.go new file mode 100644 index 00000000000..65bdb3aa854 --- /dev/null +++ b/cmd/jaeger/internal/sanitizer/emptyservicename.go @@ -0,0 +1,41 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer + +import ( + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + + "github.com/jaegertracing/jaeger/pkg/otelsemconv" +) + +const ( + emptyServiceName = "empty-service-name" + serviceNameWrongType = "service-name-wrong-type" + missingServiceName = "missing-service-name" +) + +// NewEmptyServiceNameSanitizer returns a sanitizer function that replaces +// empty and missing service names with placeholder strings. +func NewEmptyServiceNameSanitizer() Func { + return sanitizeEmptyServiceName +} + +func sanitizeEmptyServiceName(traces ptrace.Traces) ptrace.Traces { + resourceSpans := traces.ResourceSpans() + for i := 0; i < resourceSpans.Len(); i++ { + resourceSpan := resourceSpans.At(i) + attributes := resourceSpan.Resource().Attributes() + serviceName, ok := attributes.Get(string(otelsemconv.ServiceNameKey)) + switch { + case !ok: + attributes.PutStr(string(otelsemconv.ServiceNameKey), missingServiceName) + case serviceName.Type() != pcommon.ValueTypeStr: + attributes.PutStr(string(otelsemconv.ServiceNameKey), serviceNameWrongType) + case serviceName.Str() == "": + attributes.PutStr(string(otelsemconv.ServiceNameKey), emptyServiceName) + } + } + return traces +} diff --git a/cmd/jaeger/internal/sanitizer/emptyservicename_test.go b/cmd/jaeger/internal/sanitizer/emptyservicename_test.go new file mode 100644 index 00000000000..19073d3a0d1 --- /dev/null +++ b/cmd/jaeger/internal/sanitizer/emptyservicename_test.go @@ -0,0 +1,80 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +func TestEmptyServiceNameSanitizer_SubstitutesCorrectlyForStrings(t *testing.T) { + emptyServiceName := "" + nonEmptyServiceName := "hello" + tests := []struct { + name string + serviceName *string + expectedServiceName string + }{ + { + name: "no service name", + expectedServiceName: "missing-service-name", + }, + { + name: "empty service name", + serviceName: &emptyServiceName, + expectedServiceName: "empty-service-name", + }, + { + name: "non-empty service name", + serviceName: &nonEmptyServiceName, + expectedServiceName: "hello", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + traces := ptrace.NewTraces() + attributes := traces. + ResourceSpans(). + AppendEmpty(). + Resource(). + Attributes() + if test.serviceName != nil { + attributes.PutStr("service.name", *test.serviceName) + } + sanitizer := NewEmptyServiceNameSanitizer() + sanitized := sanitizer(traces) + serviceName, ok := sanitized. + ResourceSpans(). + At(0). + Resource(). + Attributes(). + Get("service.name") + require.True(t, ok) + require.Equal(t, test.expectedServiceName, serviceName.Str()) + }) + } +} + +func TestEmptyServiceNameSanitizer_SubstitutesCorrectlyForNonStringType(t *testing.T) { + traces := ptrace.NewTraces() + traces. + ResourceSpans(). + AppendEmpty(). + Resource(). + Attributes(). + PutInt("service.name", 1) + sanitizer := NewEmptyServiceNameSanitizer() + sanitized := sanitizer(traces) + serviceName, ok := sanitized. + ResourceSpans(). + At(0). + Resource(). + Attributes(). + Get("service.name") + require.True(t, ok) + require.Equal(t, "service-name-wrong-type", serviceName.Str()) +} diff --git a/cmd/jaeger/internal/sanitizer/package_test.go b/cmd/jaeger/internal/sanitizer/package_test.go new file mode 100644 index 00000000000..a7d22934b92 --- /dev/null +++ b/cmd/jaeger/internal/sanitizer/package_test.go @@ -0,0 +1,14 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer + +import ( + "testing" + + "github.com/jaegertracing/jaeger/pkg/testutils" +) + +func TestMain(m *testing.M) { + testutils.VerifyGoLeaks(m) +} diff --git a/cmd/jaeger/internal/sanitizer/sanitizer.go b/cmd/jaeger/internal/sanitizer/sanitizer.go new file mode 100644 index 00000000000..1ac3fa9f579 --- /dev/null +++ b/cmd/jaeger/internal/sanitizer/sanitizer.go @@ -0,0 +1,33 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer + +import ( + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// Func is a function that performs enrichment, clean-up, or normalization of trace data. +type Func func(traces ptrace.Traces) ptrace.Traces + +// NewStandardSanitizers returns a list of all the sanitizers that are used by the +// storage exporter. +func NewStandardSanitizers() []Func { + return []Func{ + NewEmptyServiceNameSanitizer(), + } +} + +// NewChainedSanitizer creates a Sanitizer from the variadic list of passed Sanitizers. +// If the list only has one element, it is returned directly to minimize indirection. +func NewChainedSanitizer(sanitizers ...Func) Func { + if len(sanitizers) == 1 { + return sanitizers[0] + } + return func(traces ptrace.Traces) ptrace.Traces { + for _, s := range sanitizers { + traces = s(traces) + } + return traces + } +} diff --git a/cmd/jaeger/internal/sanitizer/sanitizer_test.go b/cmd/jaeger/internal/sanitizer/sanitizer_test.go new file mode 100644 index 00000000000..18399a28bc2 --- /dev/null +++ b/cmd/jaeger/internal/sanitizer/sanitizer_test.go @@ -0,0 +1,57 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +func TestNewStandardSanitizers(t *testing.T) { + sanitizers := NewStandardSanitizers() + require.Len(t, sanitizers, 1) +} + +func TestNewChainedSanitizer(t *testing.T) { + var s1 Func = func(traces ptrace.Traces) ptrace.Traces { + traces. + ResourceSpans(). + AppendEmpty(). + Resource(). + Attributes(). + PutStr("hello", "world") + return traces + } + var s2 Func = func(traces ptrace.Traces) ptrace.Traces { + traces. + ResourceSpans(). + At(0). + Resource(). + Attributes(). + PutStr("hello", "goodbye") + return traces + } + c1 := NewChainedSanitizer(s1) + t1 := c1(ptrace.NewTraces()) + hello, ok := t1. + ResourceSpans(). + At(0). + Resource(). + Attributes(). + Get("hello") + require.True(t, ok) + require.Equal(t, "world", hello.Str()) + c2 := NewChainedSanitizer(s1, s2) + t2 := c2(ptrace.NewTraces()) + hello, ok = t2. + ResourceSpans(). + At(0). + Resource(). + Attributes(). + Get("hello") + require.True(t, ok) + require.Equal(t, "goodbye", hello.Str()) +}