From 06cef9167ef9ea41cfc1a4e0de8da29e6fe42f27 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Wed, 14 Aug 2024 14:39:33 -0400 Subject: [PATCH 01/35] adding inference trace injection --- .../azure-core/azure/core/tracing/__init__.py | 1 + .../tracing/_generative_ai_trace_injectors.py | 48 +++ .../core/tracing/_inference_api_injector.py | 370 ++++++++++++++++++ 3 files changed, 419 insertions(+) create mode 100644 sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py create mode 100644 sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py index ecf6fe6da8df..703d89e31a23 100644 --- a/sdk/core/azure-core/azure/core/tracing/__init__.py +++ b/sdk/core/azure-core/azure/core/tracing/__init__.py @@ -8,5 +8,6 @@ HttpSpanMixin, Link, ) +from ._generative_ai_trace_injectors import start_generative_ai_traces, stop_generative_ai_traces, GenerativeAIPackage __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"] diff --git a/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py b/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py new file mode 100644 index 000000000000..91ced9f54509 --- /dev/null +++ b/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py @@ -0,0 +1,48 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from enum import Enum + +class GenerativeAIPackage(str, Enum): + """An enumeration class to represent the packages that provide generative AI traces.""" + + INFERENCE = "azure.ai.inference" + + +def start_generative_ai_traces(package_name: GenerativeAIPackage, enable_content_tracing: bool=False): + """This function starts generative AI traces for the requested package. + + Args: + package_name (GenerativeAIPackage): The pacakge for which generative AI tracing is to be started. + enable_content_tracing (enable_content_tracing, optional): Configures whether the message content gets traced as part of the generative AI traces for the specific pacakge + for which tracing is being started as specified in the pacakge_name parameter. + Note that this value is package specific, in other words, the value passed in will only apply for the specific + package for which the traces are requested to be started and will not have an impact on any traces previously started + for other packages. + Defaults to False. + + Raises: + RuntimeError: If traces for the requested package have already been started. + ValueError: The specified package does not support generative AI traces. + """ + if package_name == GenerativeAIPackage.INFERENCE: + from ._inference_api_injector import _inject_inference_api + _inject_inference_api(enable_content_tracing) + else: + raise ValueError("The specified package does not support generative AI traces") + + +def stop_generative_ai_traces(package_name: GenerativeAIPackage): + """This function stops tracing for the generative AI pacakge. + + Args: + package_name (GenerativeAIPackage): The pacakge for which tracing is to be stopped. + + Raises: + ValueError: The specified package does not support generative AI traces. + """ + if package_name == GenerativeAIPackage.INFERENCE: + from ._inference_api_injector import _restore_inference_api + _restore_inference_api() + else: + raise ValueError("The specified package does not support generative AI traces") diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py new file mode 100644 index 000000000000..3b369ec5373c --- /dev/null +++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py @@ -0,0 +1,370 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import asyncio +import functools +import importlib +import json +import logging +from enum import Enum +from typing import Any, Iterator, Callable, Optional, List +from azure.ai.inference.aio import ChatCompletionsClient +from azure.ai.inference import models as _models +from azure.core.tracing import AbstractSpan +from azure.core.tracing import SpanKind +from azure.core.settings import settings +from .common import get_function_and_class_name + +_inference_traces_enabled: bool = False +_trace_inference_content: bool = False + +class TraceType(str, Enum): + """An enumeration class to represent different types of traces.""" + + INFERENCE = "Inference" + + +def _set_attributes(span: AbstractSpan, *attrs: tuple[str, Any]) -> None: + for attr in attrs: + key, value = attr + if value is not None: + span.add_attribute(key, value) + + +def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None: + for message in kwargs.get("messages", []): + try: + message = message.as_dict() + except AttributeError: + pass + + if message.get("role"): + name = f"gen_ai.{message.get('role')}.message" + span.span_instance.add_event( + name=name, + attributes={ + "get_ai.system": "openai", + "gen_ai.event.content": json.dumps(message) + } + ) + + +def _add_request_chat_attributes(span: AbstractSpan, **kwargs: Any) -> None: + _set_attributes( + span, + ("gen_ai.system", "openai"), + ("gen_ai.request.model", kwargs.get("model")), + ("gen_ai.request.max_tokens", kwargs.get("max_tokens")), + ("gen_ai.request.temperature", kwargs.get("temperature")), + ("gen_ai.request.top_p", kwargs.get("top_p")), + ) + + +def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None: + global _trace_inference_content + for choice in result.choices: + if _trace_inference_content: + response: dict[str, Any] = { + "message": {"content": choice.message.content}, + "finish_reason": str(choice.finish_reason), + "index": choice.index, + } + attributes={ + "get_ai.system": "openai", + "gen_ai.event.content": json.dumps(response) + } + else: + response: dict[str, Any] = { + "finish_reason": str(choice.finish_reason), + "index": choice.index, + } + attributes={ + "get_ai.system": "openai", + } + if choice.message.tool_calls: + response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] + span.span_instance.add_event(name="gen_ai.choice", attributes=attributes) + + +def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None: + _set_attributes( + span, + ("gen_ai.response.id", result.id), + ("gen_ai.response.model", result.model), + ("gen_ai.response.finish_reason", str(result.choices[-1].finish_reason)), + ("gen_ai.usage.completion_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None), + ("gen_ai.usage.prompt_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None), + ) + + +def _add_request_span_attributes(span: AbstractSpan, span_name: str, kwargs: Any) -> None: + global _trace_inference_content + if span_name.startswith("ChatCompletionsClient.complete"): + _add_request_chat_attributes(span, **kwargs) + if _trace_inference_content: + _add_request_chat_message_event(span, **kwargs) + # TODO add more models here + + +def _add_response_span_attributes(span: AbstractSpan, result: object) -> None: + if isinstance(result, _models.ChatCompletions): + _add_response_chat_attributes(span, result) + _add_response_chat_message_event(span, result) + # TODO add more models here + + +def _accumulate_response(item, accumulate: dict[str, Any]) -> None: + if item.finish_reason: + accumulate["finish_reason"] = item.finish_reason + if item.index: + accumulate["index"] = item.index + if item.delta.content: + accumulate.setdefault("message", {}) + accumulate["message"].setdefault("content", "") + accumulate["message"]["content"] += item.delta.content + if item.delta.tool_calls: + accumulate.setdefault("message", {}) + accumulate["message"].setdefault("tool_calls", []) + for tool_call in item.delta.tool_calls: + if tool_call.id: + accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}}) + if tool_call.type: + accumulate["message"]["tool_calls"][-1]["type"] = tool_call.type + if tool_call.function and tool_call.function.name: + accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name + if tool_call.function and tool_call.function.arguments: + accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments + + +def _wrapped_stream(stream_obj: _models.StreamingChatCompletions, span: AbstractSpan) -> _models.StreamingChatCompletions: + class StreamWrapper(_models.StreamingChatCompletions): + def __init__(self, stream_obj): + super().__init__(stream_obj._response) + + def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: + global _trace_inference_content + try: + accumulate: dict[str, Any] = {} + for chunk in stream_obj: + for item in chunk.choices: + _accumulate_response(item, accumulate) + yield chunk + + if _trace_inference_content: + span.span_instance.add_event( + name="gen_ai.choice", + attributes={ + "get_ai.system": "openai", + "gen_ai.event.content": json.dumps(accumulate) + } + ) + _add_response_chat_attributes(span, chunk) + + except Exception as exc: + _set_attributes(span, ("error.type", exc.__class__.__name__)) + raise + + finally: + if stream_obj._done is False: + if accumulate.get("finish_reason") is None: + accumulate["finish_reason"] = "error" + if _trace_inference_content: + span.span_instance.add_event( + name="gen_ai.choice", + attributes={ + "get_ai.system": "openai", + "gen_ai.event.content": json.dumps(accumulate) + } + ) + span.finish() + + return StreamWrapper(stream_obj) + + +def _trace_sync_function( + func: Callable = None, + *, + args_to_ignore: Optional[List[str]] = None, + trace_type=TraceType.INFERENCE, + name: Optional[str] = None, +) -> Callable: + """ + Decorator that adds tracing to a synchronous function. + + Args: + func (Callable): The function to be traced. + args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace. + Defaults to None. + trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE. + name (str, optional): The name of the trace, will set to func name if not provided. + + + Returns: + Callable: The traced function. + """ + + @functools.wraps(func) + def inner(*args, **kwargs): + + span_impl_type = settings.tracing_implementation() + if span_impl_type is None: + return func(*args, **kwargs) + + span_name = get_function_and_class_name(func, *args) + span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL) + try: + # tracing events not supported in azure-core-tracing-opentelemetry + # so need to access the span instance directly + with span_impl_type.change_context(span.span_instance): + _add_request_span_attributes(span, span_name, kwargs) + result = func(*args, **kwargs) + if kwargs.get("stream") is True: + return _wrapped_stream(result, span) + _add_response_span_attributes(span, result) + + except Exception as exc: + _set_attributes(span, ("error.type", exc.__class__.__name__)) + span.finish() + raise + + span.finish() + return result + + return inner + + +def _trace_async_function( + func: Callable = None, + *, + args_to_ignore: Optional[List[str]] = None, + trace_type=TraceType.INFERENCE, + name: Optional[str] = None, +) -> Callable: + """ + Decorator that adds tracing to an asynchronous function. + + Args: + func (Callable): The function to be traced. + args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace. + Defaults to None. + trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE. + name (str, optional): The name of the trace, will set to func name if not provided. + + + Returns: + Callable: The traced function. + """ + + @functools.wraps(func) + async def inner(*args, **kwargs): + + span_impl_type = settings.tracing_implementation() + if span_impl_type is None: + return func(*args, **kwargs) + + span_name = get_function_and_class_name(func, *args) + span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL) + try: + # tracing events not supported in azure-core-tracing-opentelemetry + # so need to access the span instance directly + with span_impl_type.change_context(span.span_instance): + _add_request_span_attributes(span, span_name, kwargs) + result = await func(*args, **kwargs) + if kwargs.get("stream") is True: + return _wrapped_stream(result, span) + _add_response_span_attributes(span, result) + + except Exception as exc: + _set_attributes(span, ("error.type", exc.__class__.__name__)) + span.finish() + raise + + span.finish() + return result + + return inner + + +def inject_async(f, trace_type, name): + wrapper_fun = _trace_async_function(f) + wrapper_fun._original = f + return wrapper_fun + + +def inject_sync(f, trace_type, name): + wrapper_fun = _trace_sync_function(f) + wrapper_fun._original = f + return wrapper_fun + + +def _inference_apis(): + sync_apis = ( + ("azure.ai.inference", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"), + ) + async_apis = () + return sync_apis, async_apis + + +def _inference_api_list(): + sync_apis, async_apis = _inference_apis() + yield sync_apis, inject_sync + yield async_apis, inject_async + + +def _generate_api_and_injector(apis): + for apis, injector in apis: + for module_name, class_name, method_name, trace_type, name in apis: + try: + module = importlib.import_module(module_name) + api = getattr(module, class_name) + if hasattr(api, method_name): + yield api, method_name, trace_type, injector, name + except AttributeError as e: + # Log the attribute exception with the missing class information + logging.warning( + f"AttributeError: The module '{module_name}' does not have the class '{class_name}'. {str(e)}" + ) + except Exception as e: + # Log other exceptions as a warning, as we're not sure what they might be + logging.warning(f"An unexpected error occurred: {str(e)}") + + +def available_inference_apis_and_injectors(): + """ + Generates a sequence of tuples containing Inference API classes, method names, and + corresponding injector functions. + + Yields: + Tuples of (api_class, method_name, injector_function) + """ + yield from _generate_api_and_injector(_inference_api_list()) + + +def _inject_inference_api(enable_content_tracing: bool = False): + """This function modifies the methods of the Inference API classes to inject logic before calling the original methods. + The original methods are stored as _original attributes of the methods. + """ + global _inference_traces_enabled + global _trace_inference_content + if _inference_traces_enabled: + raise RuntimeError("Traces already started for azure.ai.inference") + _inference_traces_enabled = True + _trace_inference_content = enable_content_tracing + for api, method, trace_type, injector, name in available_inference_apis_and_injectors(): + # Check if the method of the api class has already been modified + if not hasattr(getattr(api, method), "_original"): + setattr(api, method, injector(getattr(api, method), trace_type, name)) + + +def _restore_inference_api(): + """This function restores the original methods of the Inference API classes + by assigning them back from the _original attributes of the modified methods. + """ + global _inference_traces_enabled + global _trace_inference_content + _trace_inference_content = False + for api, method, _, _, _ in available_inference_apis_and_injectors(): + if hasattr(getattr(api, method), "_original"): + setattr(api, method, getattr(getattr(api, method), "_original")) + _inference_traces_enabled = False From 9dc2cf957febe69a7cc63b6678e4ac84d2a4cea0 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 16 Aug 2024 12:47:39 -0400 Subject: [PATCH 02/35] changing the interface based on feedback --- .../azure-core/azure/core/tracing/__init__.py | 2 +- .../tracing/_generative_ai_trace_injectors.py | 48 ------------------- .../tracing/_inference_api_instrumentor.py | 34 +++++++++++++ ...py => _inference_api_instrumentor_impl.py} | 32 ++++++++----- .../tracing/azure_telemetry_instrumentor.py | 20 ++++++++ 5 files changed, 75 insertions(+), 61 deletions(-) delete mode 100644 sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py create mode 100644 sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py rename sdk/core/azure-core/azure/core/tracing/{_inference_api_injector.py => _inference_api_instrumentor_impl.py} (94%) create mode 100644 sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py index 703d89e31a23..741b9e07b267 100644 --- a/sdk/core/azure-core/azure/core/tracing/__init__.py +++ b/sdk/core/azure-core/azure/core/tracing/__init__.py @@ -8,6 +8,6 @@ HttpSpanMixin, Link, ) -from ._generative_ai_trace_injectors import start_generative_ai_traces, stop_generative_ai_traces, GenerativeAIPackage +from ._inference_api_instrumentor import InferenceApiInstrumentor __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"] diff --git a/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py b/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py deleted file mode 100644 index 91ced9f54509..000000000000 --- a/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py +++ /dev/null @@ -1,48 +0,0 @@ -# --------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# --------------------------------------------------------- -from enum import Enum - -class GenerativeAIPackage(str, Enum): - """An enumeration class to represent the packages that provide generative AI traces.""" - - INFERENCE = "azure.ai.inference" - - -def start_generative_ai_traces(package_name: GenerativeAIPackage, enable_content_tracing: bool=False): - """This function starts generative AI traces for the requested package. - - Args: - package_name (GenerativeAIPackage): The pacakge for which generative AI tracing is to be started. - enable_content_tracing (enable_content_tracing, optional): Configures whether the message content gets traced as part of the generative AI traces for the specific pacakge - for which tracing is being started as specified in the pacakge_name parameter. - Note that this value is package specific, in other words, the value passed in will only apply for the specific - package for which the traces are requested to be started and will not have an impact on any traces previously started - for other packages. - Defaults to False. - - Raises: - RuntimeError: If traces for the requested package have already been started. - ValueError: The specified package does not support generative AI traces. - """ - if package_name == GenerativeAIPackage.INFERENCE: - from ._inference_api_injector import _inject_inference_api - _inject_inference_api(enable_content_tracing) - else: - raise ValueError("The specified package does not support generative AI traces") - - -def stop_generative_ai_traces(package_name: GenerativeAIPackage): - """This function stops tracing for the generative AI pacakge. - - Args: - package_name (GenerativeAIPackage): The pacakge for which tracing is to be stopped. - - Raises: - ValueError: The specified package does not support generative AI traces. - """ - if package_name == GenerativeAIPackage.INFERENCE: - from ._inference_api_injector import _restore_inference_api - _restore_inference_api() - else: - raise ValueError("The specified package does not support generative AI traces") diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py new file mode 100644 index 000000000000..e47cb26784cd --- /dev/null +++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py @@ -0,0 +1,34 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +import os +from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor + +class InferenceApiInstrumentor(AzureTelemetryInstrumentor): + def __init__(self): + super().__init__() + + def str_to_bool(self, s): + if s is None: + return False + return str(s).lower() == 'true' + + def instrument(self): + if self.is_instrumented(): + raise RuntimeError("Already instrumented") + + var_value = os.environ.get("AZURE_INFERENCE_API_ENABLE_CONTENT_TRACING") + enable_content_tracing = self.str_to_bool(var_value) + from ._inference_api_instrumentor_impl import _inject_inference_api + _inject_inference_api(enable_content_tracing) + + def uninstrument(self): + if not self.is_instrumented(): + raise RuntimeError("Not instrumented") + + from ._inference_api_instrumentor_impl import _restore_inference_api + _restore_inference_api() + + def is_instrumented(self): + from ._inference_api_instrumentor_impl import _is_instrumented + return _is_instrumented() diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py similarity index 94% rename from sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py rename to sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py index 3b369ec5373c..f2ceaeb18af2 100644 --- a/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py +++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py @@ -183,7 +183,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: def _trace_sync_function( - func: Callable = None, + function: Callable = None, *, args_to_ignore: Optional[List[str]] = None, trace_type=TraceType.INFERENCE, @@ -193,7 +193,7 @@ def _trace_sync_function( Decorator that adds tracing to a synchronous function. Args: - func (Callable): The function to be traced. + function (Callable): The function to be traced. args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace. Defaults to None. trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE. @@ -204,21 +204,21 @@ def _trace_sync_function( Callable: The traced function. """ - @functools.wraps(func) + @functools.wraps(function) def inner(*args, **kwargs): span_impl_type = settings.tracing_implementation() if span_impl_type is None: - return func(*args, **kwargs) + return function(*args, **kwargs) - span_name = get_function_and_class_name(func, *args) + span_name = get_function_and_class_name(function, *args) span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL) try: # tracing events not supported in azure-core-tracing-opentelemetry # so need to access the span instance directly with span_impl_type.change_context(span.span_instance): _add_request_span_attributes(span, span_name, kwargs) - result = func(*args, **kwargs) + result = function(*args, **kwargs) if kwargs.get("stream") is True: return _wrapped_stream(result, span) _add_response_span_attributes(span, result) @@ -235,7 +235,7 @@ def inner(*args, **kwargs): def _trace_async_function( - func: Callable = None, + function: Callable = None, *, args_to_ignore: Optional[List[str]] = None, trace_type=TraceType.INFERENCE, @@ -245,7 +245,7 @@ def _trace_async_function( Decorator that adds tracing to an asynchronous function. Args: - func (Callable): The function to be traced. + function (Callable): The function to be traced. args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace. Defaults to None. trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE. @@ -256,21 +256,21 @@ def _trace_async_function( Callable: The traced function. """ - @functools.wraps(func) + @functools.wraps(function) async def inner(*args, **kwargs): span_impl_type = settings.tracing_implementation() if span_impl_type is None: - return func(*args, **kwargs) + return function(*args, **kwargs) - span_name = get_function_and_class_name(func, *args) + span_name = get_function_and_class_name(function, *args) span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL) try: # tracing events not supported in azure-core-tracing-opentelemetry # so need to access the span instance directly with span_impl_type.change_context(span.span_instance): _add_request_span_attributes(span, span_name, kwargs) - result = await func(*args, **kwargs) + result = await function(*args, **kwargs) if kwargs.get("stream") is True: return _wrapped_stream(result, span) _add_response_span_attributes(span, result) @@ -368,3 +368,11 @@ def _restore_inference_api(): if hasattr(getattr(api, method), "_original"): setattr(api, method, getattr(getattr(api, method), "_original")) _inference_traces_enabled = False + + +def _is_instrumented(): + """This function returns True if Inference API has already been instrumented + for tracing and False if the API has not been instrumented. + """ + global _inference_traces_enabled + return _inference_traces_enabled diff --git a/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py new file mode 100644 index 000000000000..7950a442363e --- /dev/null +++ b/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py @@ -0,0 +1,20 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from abc import ABC, abstractmethod + +class AzureTelemetryInstrumentor(ABC): + def __init__(self): + pass + + @abstractmethod + def instrument(self): + pass + + @abstractmethod + def uninstrument(self): + pass + + @abstractmethod + def is_instrumented(self): + pass \ No newline at end of file From 58a032b851466d707c4a72f780dc3501c56ecea7 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 16 Aug 2024 16:25:17 -0400 Subject: [PATCH 03/35] updates --- sdk/core/azure-core/azure/core/tracing/__init__.py | 2 +- ...umentor.py => _ai_inference_api_instrumentor.py} | 2 +- .../tracing/_inference_api_instrumentor_impl.py | 13 +++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) rename sdk/core/azure-core/azure/core/tracing/{_inference_api_instrumentor.py => _ai_inference_api_instrumentor.py} (95%) diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py index 741b9e07b267..79738b9fd650 100644 --- a/sdk/core/azure-core/azure/core/tracing/__init__.py +++ b/sdk/core/azure-core/azure/core/tracing/__init__.py @@ -8,6 +8,6 @@ HttpSpanMixin, Link, ) -from ._inference_api_instrumentor import InferenceApiInstrumentor +from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"] diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py similarity index 95% rename from sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py rename to sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py index e47cb26784cd..5402e233f2f6 100644 --- a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py +++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py @@ -4,7 +4,7 @@ import os from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor -class InferenceApiInstrumentor(AzureTelemetryInstrumentor): +class AiInferenceApiInstrumentor(AzureTelemetryInstrumentor): def __init__(self): super().__init__() diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py index f2ceaeb18af2..22a0330dabd9 100644 --- a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py @@ -18,6 +18,7 @@ _inference_traces_enabled: bool = False _trace_inference_content: bool = False +INFERENCE_GEN_AI_SYSTEM_NAME = "azure.ai.inference" class TraceType(str, Enum): """An enumeration class to represent different types of traces.""" @@ -44,7 +45,7 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None: span.span_instance.add_event( name=name, attributes={ - "get_ai.system": "openai", + "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(message) } ) @@ -53,7 +54,7 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None: def _add_request_chat_attributes(span: AbstractSpan, **kwargs: Any) -> None: _set_attributes( span, - ("gen_ai.system", "openai"), + ("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME), ("gen_ai.request.model", kwargs.get("model")), ("gen_ai.request.max_tokens", kwargs.get("max_tokens")), ("gen_ai.request.temperature", kwargs.get("temperature")), @@ -71,7 +72,7 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom "index": choice.index, } attributes={ - "get_ai.system": "openai", + "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(response) } else: @@ -80,7 +81,7 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom "index": choice.index, } attributes={ - "get_ai.system": "openai", + "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, } if choice.message.tool_calls: response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] @@ -155,7 +156,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: span.span_instance.add_event( name="gen_ai.choice", attributes={ - "get_ai.system": "openai", + "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(accumulate) } ) @@ -173,7 +174,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: span.span_instance.add_event( name="gen_ai.choice", attributes={ - "get_ai.system": "openai", + "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(accumulate) } ) From ec1cd166e1739332f9c78e25991d5bd113574f23 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Tue, 20 Aug 2024 13:52:43 -0400 Subject: [PATCH 04/35] changing name of environment variable --- .../azure/core/tracing/_ai_inference_api_instrumentor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py index 5402e233f2f6..b6b2ae08caad 100644 --- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py +++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py @@ -17,7 +17,7 @@ def instrument(self): if self.is_instrumented(): raise RuntimeError("Already instrumented") - var_value = os.environ.get("AZURE_INFERENCE_API_ENABLE_CONTENT_TRACING") + var_value = os.environ.get("AZUREAI_INFERENCE_API_ENABLE_CONTENT_TRACING") enable_content_tracing = self.str_to_bool(var_value) from ._inference_api_instrumentor_impl import _inject_inference_api _inject_inference_api(enable_content_tracing) From 327007684beb06a23dec9852629045c21de46f52 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 6 Sep 2024 16:08:42 -0500 Subject: [PATCH 05/35] changes based on review comments and some other changes --- .../tracing/_ai_inference_api_instrumentor.py | 5 +- .../_inference_api_instrumentor_impl.py | 166 +++++++++++------- 2 files changed, 109 insertions(+), 62 deletions(-) diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py index b6b2ae08caad..1964e3a83793 100644 --- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py +++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py @@ -3,7 +3,8 @@ # --------------------------------------------------------- import os from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor - + + class AiInferenceApiInstrumentor(AzureTelemetryInstrumentor): def __init__(self): super().__init__() @@ -17,7 +18,7 @@ def instrument(self): if self.is_instrumented(): raise RuntimeError("Already instrumented") - var_value = os.environ.get("AZUREAI_INFERENCE_API_ENABLE_CONTENT_TRACING") + var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") enable_content_tracing = self.str_to_bool(var_value) from ._inference_api_instrumentor_impl import _inject_inference_api _inject_inference_api(enable_content_tracing) diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py index 22a0330dabd9..36e14936e174 100644 --- a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py @@ -7,18 +7,21 @@ import importlib import json import logging +from urllib.parse import urlparse from enum import Enum -from typing import Any, Iterator, Callable, Optional, List +from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict from azure.ai.inference.aio import ChatCompletionsClient from azure.ai.inference import models as _models from azure.core.tracing import AbstractSpan from azure.core.tracing import SpanKind from azure.core.settings import settings from .common import get_function_and_class_name +from opentelemetry.trace import Status, StatusCode, Span _inference_traces_enabled: bool = False _trace_inference_content: bool = False -INFERENCE_GEN_AI_SYSTEM_NAME = "azure.ai.inference" +INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference" + class TraceType(str, Enum): """An enumeration class to represent different types of traces.""" @@ -26,7 +29,7 @@ class TraceType(str, Enum): INFERENCE = "Inference" -def _set_attributes(span: AbstractSpan, *attrs: tuple[str, Any]) -> None: +def _set_attributes(span: AbstractSpan, *attrs: Tuple[str, Any]) -> None: for attr in attrs: key, value = attr if value is not None: @@ -45,67 +48,87 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None: span.span_instance.add_event( name=name, attributes={ - "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(message) } ) -def _add_request_chat_attributes(span: AbstractSpan, **kwargs: Any) -> None: +def parse_url(url): + parsed = urlparse(url) + server_address = parsed.hostname + port = parsed.port + return server_address, port + + +def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) -> None: + client = args[0] + endpoint = client._config.endpoint + server_address, port = parse_url(endpoint) _set_attributes( span, + ("gen_ai.operation.name", "chat"), ("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME), ("gen_ai.request.model", kwargs.get("model")), ("gen_ai.request.max_tokens", kwargs.get("max_tokens")), ("gen_ai.request.temperature", kwargs.get("temperature")), ("gen_ai.request.top_p", kwargs.get("top_p")), + ("server.address", server_address), ) + if port is not None and port != 443: + span.add_attribute("server.port", port) def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None: - global _trace_inference_content for choice in result.choices: if _trace_inference_content: - response: dict[str, Any] = { + response: Dict[str, Any] = { "message": {"content": choice.message.content}, "finish_reason": str(choice.finish_reason), "index": choice.index, } attributes={ - "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(response) } else: - response: dict[str, Any] = { + response: Dict[str, Any] = { "finish_reason": str(choice.finish_reason), "index": choice.index, } attributes={ - "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, } if choice.message.tool_calls: response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] span.span_instance.add_event(name="gen_ai.choice", attributes=attributes) +def get_finish_reasons(result): + if hasattr(result, "choices") and result.choices: + return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices] + else: + return None + + def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None: + _set_attributes( span, ("gen_ai.response.id", result.id), ("gen_ai.response.model", result.model), - ("gen_ai.response.finish_reason", str(result.choices[-1].finish_reason)), - ("gen_ai.usage.completion_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None), - ("gen_ai.usage.prompt_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None), + ("gen_ai.usage.input_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None), + ("gen_ai.usage.output_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None), ) + finish_reasons = get_finish_reasons(result) + span.add_attribute("gen_ai.response.finish_reasons", finish_reasons) -def _add_request_span_attributes(span: AbstractSpan, span_name: str, kwargs: Any) -> None: +def _add_request_span_attributes(span: AbstractSpan, span_name: str, args: Any, kwargs: Any) -> None: global _trace_inference_content - if span_name.startswith("ChatCompletionsClient.complete"): - _add_request_chat_attributes(span, **kwargs) - if _trace_inference_content: - _add_request_chat_message_event(span, **kwargs) - # TODO add more models here + _add_request_chat_attributes(span, *args, **kwargs) + if _trace_inference_content: + _add_request_chat_message_event(span, **kwargs) def _add_response_span_attributes(span: AbstractSpan, result: object) -> None: @@ -115,7 +138,7 @@ def _add_response_span_attributes(span: AbstractSpan, result: object) -> None: # TODO add more models here -def _accumulate_response(item, accumulate: dict[str, Any]) -> None: +def _accumulate_response(item, accumulate: Dict[str, Any]) -> None: if item.finish_reason: accumulate["finish_reason"] = item.finish_reason if item.index: @@ -146,7 +169,7 @@ def __init__(self, stream_obj): def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: global _trace_inference_content try: - accumulate: dict[str, Any] = {} + accumulate: Dict[str, Any] = {} for chunk in stream_obj: for item in chunk.choices: _accumulate_response(item, accumulate) @@ -156,14 +179,19 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: span.span_instance.add_event( name="gen_ai.choice", attributes={ - "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(accumulate) } ) _add_response_chat_attributes(span, chunk) except Exception as exc: - _set_attributes(span, ("error.type", exc.__class__.__name__)) + # Set the span status to error + if isinstance(span.span_instance, Span): + span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) + module = exc.__module__ if exc.__module__ != "builtins" else "" + error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__ + _set_attributes(span, ("error.type", error_type)) raise finally: @@ -174,7 +202,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: span.span_instance.add_event( name="gen_ai.choice", attributes={ - "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(accumulate) } ) @@ -212,25 +240,34 @@ def inner(*args, **kwargs): if span_impl_type is None: return function(*args, **kwargs) - span_name = get_function_and_class_name(function, *args) - span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL) - try: - # tracing events not supported in azure-core-tracing-opentelemetry - # so need to access the span instance directly - with span_impl_type.change_context(span.span_instance): - _add_request_span_attributes(span, span_name, kwargs) - result = function(*args, **kwargs) - if kwargs.get("stream") is True: - return _wrapped_stream(result, span) - _add_response_span_attributes(span, result) - - except Exception as exc: - _set_attributes(span, ("error.type", exc.__class__.__name__)) - span.finish() - raise + class_function_name = get_function_and_class_name(function, *args) - span.finish() - return result + if class_function_name.startswith("ChatCompletionsClient.complete"): + # span_name = {gen_ai.operation.name} {gen_ai.request.model} + span_name = f"chat {kwargs.get('model')}" + span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) + try: + # tracing events not supported in azure-core-tracing-opentelemetry + # so need to access the span instance directly + with span_impl_type.change_context(span.span_instance): + _add_request_span_attributes(span, span_name, args, kwargs) + result = function(*args, **kwargs) + if kwargs.get("stream") is True: + return _wrapped_stream(result, span) + _add_response_span_attributes(span, result) + + except Exception as exc: + # Set the span status to error + if isinstance(span.span_instance, Span): + span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) + module = exc.__module__ if exc.__module__ != "builtins" else "" + error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__ + _set_attributes(span, ("error.type", error_type)) + span.finish() + raise + + span.finish() + return result return inner @@ -264,25 +301,34 @@ async def inner(*args, **kwargs): if span_impl_type is None: return function(*args, **kwargs) - span_name = get_function_and_class_name(function, *args) - span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL) - try: - # tracing events not supported in azure-core-tracing-opentelemetry - # so need to access the span instance directly - with span_impl_type.change_context(span.span_instance): - _add_request_span_attributes(span, span_name, kwargs) - result = await function(*args, **kwargs) - if kwargs.get("stream") is True: - return _wrapped_stream(result, span) - _add_response_span_attributes(span, result) - - except Exception as exc: - _set_attributes(span, ("error.type", exc.__class__.__name__)) - span.finish() - raise + class_function_name = get_function_and_class_name(function, *args) - span.finish() - return result + if class_function_name.startswith("ChatCompletionsClient.complete"): + # span_name = {gen_ai.operation.name} {gen_ai.request.model} + span_name = f"chat {kwargs.get('model')}" + span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) + try: + # tracing events not supported in azure-core-tracing-opentelemetry + # so need to access the span instance directly + with span_impl_type.change_context(span.span_instance): + _add_request_span_attributes(span, span_name, kwargs) + result = await function(*args, **kwargs) + if kwargs.get("stream") is True: + return _wrapped_stream(result, span) + _add_response_span_attributes(span, result) + + except Exception as exc: + # Set the span status to error + if isinstance(span.span_instance, Span): + span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) + module = exc.__module__ if exc.__module__ != "builtins" else "" + error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__ + _set_attributes(span, ("error.type", error_type)) + span.finish() + raise + + span.finish() + return result return inner From 7cbbc0b7e450afc43c9323dee2fccbf977e39087 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 6 Sep 2024 16:19:16 -0500 Subject: [PATCH 06/35] file name change --- .../azure/core/tracing/_ai_inference_api_instrumentor.py | 6 +++--- ...entor_impl.py => _ai_inference_api_instrumentor_impl.py} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename sdk/core/azure-core/azure/core/tracing/{_inference_api_instrumentor_impl.py => _ai_inference_api_instrumentor_impl.py} (100%) diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py index 1964e3a83793..09e25113902e 100644 --- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py +++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py @@ -20,16 +20,16 @@ def instrument(self): var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") enable_content_tracing = self.str_to_bool(var_value) - from ._inference_api_instrumentor_impl import _inject_inference_api + from ._ai_inference_api_instrumentor_impl import _inject_inference_api _inject_inference_api(enable_content_tracing) def uninstrument(self): if not self.is_instrumented(): raise RuntimeError("Not instrumented") - from ._inference_api_instrumentor_impl import _restore_inference_api + from ._ai_inference_api_instrumentor_impl import _restore_inference_api _restore_inference_api() def is_instrumented(self): - from ._inference_api_instrumentor_impl import _is_instrumented + from ._ai_inference_api_instrumentor_impl import _is_instrumented return _is_instrumented() diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py similarity index 100% rename from sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py rename to sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py From 941a9ae1f87e2726569691d26d587a2b2c88a7c0 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Tue, 10 Sep 2024 10:27:50 -0500 Subject: [PATCH 07/35] fixing exception handling --- .../core/tracing/_ai_inference_api_instrumentor_impl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py index 36e14936e174..6bc7773500bd 100644 --- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py @@ -190,7 +190,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: if isinstance(span.span_instance, Span): span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) module = exc.__module__ if exc.__module__ != "builtins" else "" - error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__ + error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ _set_attributes(span, ("error.type", error_type)) raise @@ -261,7 +261,7 @@ def inner(*args, **kwargs): if isinstance(span.span_instance, Span): span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) module = exc.__module__ if exc.__module__ != "builtins" else "" - error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__ + error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ _set_attributes(span, ("error.type", error_type)) span.finish() raise @@ -322,7 +322,7 @@ async def inner(*args, **kwargs): if isinstance(span.span_instance, Span): span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) module = exc.__module__ if exc.__module__ != "builtins" else "" - error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__ + error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ _set_attributes(span, ("error.type", error_type)) span.finish() raise From bcc6e7459ec77941b4dcac61ff6bc8854ab823fa Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Tue, 10 Sep 2024 11:02:24 -0500 Subject: [PATCH 08/35] relocating inference trace instrumentation --- .../azure/core/tracing/ai/__init__.py | 1 + .../azure/core/tracing/ai/inference/__init__.py | 5 +++++ .../tracing/ai/inference}/_ai_inference_api_instrumentor.py | 0 .../ai/inference}/_ai_inference_api_instrumentor_impl.py | 2 +- .../tracing/ai/inference}/azure_telemetry_instrumentor.py | 0 sdk/core/azure-core-tracing-opentelemetry/setup.py | 1 + 6 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py create mode 100644 sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py rename sdk/core/{azure-core/azure/core/tracing => azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference}/_ai_inference_api_instrumentor.py (100%) rename sdk/core/{azure-core/azure/core/tracing => azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference}/_ai_inference_api_instrumentor_impl.py (99%) rename sdk/core/{azure-core/azure/core/tracing => azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference}/azure_telemetry_instrumentor.py (100%) diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py new file mode 100644 index 000000000000..d55ccad1f573 --- /dev/null +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py new file mode 100644 index 000000000000..f28d7b1d1317 --- /dev/null +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py @@ -0,0 +1,5 @@ +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py similarity index 100% rename from sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py similarity index 99% rename from sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py index 6bc7773500bd..a7a25ce59d69 100644 --- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py @@ -15,7 +15,7 @@ from azure.core.tracing import AbstractSpan from azure.core.tracing import SpanKind from azure.core.settings import settings -from .common import get_function_and_class_name +from azure.core.tracing.common import get_function_and_class_name from opentelemetry.trace import Status, StatusCode, Span _inference_traces_enabled: bool = False diff --git a/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py similarity index 100% rename from sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py index ae0a5baf512a..929e1cb3fee6 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/setup.py +++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py @@ -55,6 +55,7 @@ zip_safe=False, packages=[ "azure.core.tracing.ext.opentelemetry_span", + "azure.core.tracing.ai.inference", ], include_package_data=True, package_data={ From 709923c7d7a4b4ca70c88e834df9f3c9c4ca779c Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Tue, 10 Sep 2024 11:26:13 -0500 Subject: [PATCH 09/35] reverting change in azure core tracing --- sdk/core/azure-core/azure/core/tracing/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py index 79738b9fd650..ecf6fe6da8df 100644 --- a/sdk/core/azure-core/azure/core/tracing/__init__.py +++ b/sdk/core/azure-core/azure/core/tracing/__init__.py @@ -8,6 +8,5 @@ HttpSpanMixin, Link, ) -from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"] From a64d8703734e4d71619cf586a0e93bf0aba988ee Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Mon, 16 Sep 2024 16:44:04 -0500 Subject: [PATCH 10/35] fixes --- .../_ai_inference_api_instrumentor_impl.py | 125 ++++++++++++------ 1 file changed, 82 insertions(+), 43 deletions(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py index a7a25ce59d69..333d16144894 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py @@ -3,6 +3,7 @@ # --------------------------------------------------------- import asyncio +import copy import functools import importlib import json @@ -65,11 +66,14 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) client = args[0] endpoint = client._config.endpoint server_address, port = parse_url(endpoint) + model = INFERENCE_GEN_AI_SYSTEM_NAME + if kwargs.get('model') is not None: + model = kwargs.get('model') _set_attributes( span, ("gen_ai.operation.name", "chat"), ("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME), - ("gen_ai.request.model", kwargs.get("model")), + ("gen_ai.request.model", model), ("gen_ai.request.max_tokens", kwargs.get("max_tokens")), ("gen_ai.request.temperature", kwargs.get("temperature")), ("gen_ai.request.top_p", kwargs.get("top_p")), @@ -79,38 +83,65 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) span.add_attribute("server.port", port) +def remove_function_call_names_and_arguments(tool_calls: list) -> list: + tool_calls_copy = copy.deepcopy(tool_calls) + for tool_call in tool_calls_copy: + if 'function' in tool_call: + if 'name' in tool_call['function']: + del tool_call['function']['name'] + if 'arguments' in tool_call['function']: + del tool_call['function']['arguments'] + if not tool_call['function']: + del tool_call['function'] + return tool_calls_copy + + +def get_finish_reasons(result): + if hasattr(result, "choices") and result.choices: + return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices] + else: + return None + + +def get_finish_reason_for_choice(choice): + return getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" + + def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None: for choice in result.choices: if _trace_inference_content: response: Dict[str, Any] = { "message": {"content": choice.message.content}, - "finish_reason": str(choice.finish_reason), + "finish_reason": get_finish_reason_for_choice(choice), "index": choice.index, } + if choice.message.tool_calls: + response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] attributes={ "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(response) } else: response: Dict[str, Any] = { - "finish_reason": str(choice.finish_reason), + "finish_reason": get_finish_reason_for_choice(choice), "index": choice.index, } - attributes={ - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - } - if choice.message.tool_calls: - response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] + if choice.message.tool_calls: + response["message"] = {} + tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(choice.message.tool_calls) + response["message"]["tool_calls"] = [tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed] + attributes={ + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.event.content": json.dumps(response) + } + else: + attributes={ + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.event.content": json.dumps(response) + } span.span_instance.add_event(name="gen_ai.choice", attributes=attributes) -def get_finish_reasons(result): - if hasattr(result, "choices") and result.choices: - return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices] - else: - return None - - def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None: _set_attributes( @@ -150,15 +181,16 @@ def _accumulate_response(item, accumulate: Dict[str, Any]) -> None: if item.delta.tool_calls: accumulate.setdefault("message", {}) accumulate["message"].setdefault("tool_calls", []) - for tool_call in item.delta.tool_calls: - if tool_call.id: - accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}}) - if tool_call.type: - accumulate["message"]["tool_calls"][-1]["type"] = tool_call.type - if tool_call.function and tool_call.function.name: - accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name - if tool_call.function and tool_call.function.arguments: - accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments + if item.delta.tool_calls is not None: + for tool_call in item.delta.tool_calls: + if tool_call.id: + accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}}) + if tool_call.function: + accumulate["message"]["tool_calls"][-1]["type"] = "function" + if tool_call.function and tool_call.function.name: + accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name + if tool_call.function and tool_call.function.arguments: + accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments def _wrapped_stream(stream_obj: _models.StreamingChatCompletions, span: AbstractSpan) -> _models.StreamingChatCompletions: @@ -175,14 +207,6 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: _accumulate_response(item, accumulate) yield chunk - if _trace_inference_content: - span.span_instance.add_event( - name="gen_ai.choice", - attributes={ - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(accumulate) - } - ) _add_response_chat_attributes(span, chunk) except Exception as exc: @@ -198,14 +222,24 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: if stream_obj._done is False: if accumulate.get("finish_reason") is None: accumulate["finish_reason"] = "error" - if _trace_inference_content: - span.span_instance.add_event( - name="gen_ai.choice", - attributes={ - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(accumulate) - } - ) + else: + # Only one choice expected with streaming + accumulate["index"] = 0 + # Delete message if content tracing is not enabled + if not _trace_inference_content: + if 'message' in accumulate: + if 'content' in accumulate['message']: + del accumulate['message']['content'] + if not accumulate['message']: + del accumulate['message'] + + span.span_instance.add_event( + name="gen_ai.choice", + attributes={ + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.event.content": json.dumps(accumulate) + } + ) span.finish() return StreamWrapper(stream_obj) @@ -243,8 +277,10 @@ def inner(*args, **kwargs): class_function_name = get_function_and_class_name(function, *args) if class_function_name.startswith("ChatCompletionsClient.complete"): - # span_name = {gen_ai.operation.name} {gen_ai.request.model} - span_name = f"chat {kwargs.get('model')}" + model = INFERENCE_GEN_AI_SYSTEM_NAME + if kwargs.get('model') is not None: + model = kwargs.get('model') + span_name = f"chat {model}" span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) try: # tracing events not supported in azure-core-tracing-opentelemetry @@ -305,7 +341,10 @@ async def inner(*args, **kwargs): if class_function_name.startswith("ChatCompletionsClient.complete"): # span_name = {gen_ai.operation.name} {gen_ai.request.model} - span_name = f"chat {kwargs.get('model')}" + model = INFERENCE_GEN_AI_SYSTEM_NAME + if kwargs.get('model') is not None: + model = kwargs.get('model') + span_name = f"chat {model}" span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) try: # tracing events not supported in azure-core-tracing-opentelemetry From 198b9cd8c17fb8a3ab2cd116eab03630201d6bc8 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Tue, 17 Sep 2024 09:50:58 -0500 Subject: [PATCH 11/35] changing span and model name for cases when model info not available --- .../_ai_inference_api_instrumentor_impl.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py index 333d16144894..69de1bda9365 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py @@ -66,7 +66,7 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) client = args[0] endpoint = client._config.endpoint server_address, port = parse_url(endpoint) - model = INFERENCE_GEN_AI_SYSTEM_NAME + model = 'chat' if kwargs.get('model') is not None: model = kwargs.get('model') _set_attributes( @@ -277,10 +277,12 @@ def inner(*args, **kwargs): class_function_name = get_function_and_class_name(function, *args) if class_function_name.startswith("ChatCompletionsClient.complete"): - model = INFERENCE_GEN_AI_SYSTEM_NAME - if kwargs.get('model') is not None: + if kwargs.get('model') is None: + span_name = f"chat" + else: model = kwargs.get('model') - span_name = f"chat {model}" + span_name = f"chat {model}" + span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) try: # tracing events not supported in azure-core-tracing-opentelemetry @@ -340,11 +342,12 @@ async def inner(*args, **kwargs): class_function_name = get_function_and_class_name(function, *args) if class_function_name.startswith("ChatCompletionsClient.complete"): - # span_name = {gen_ai.operation.name} {gen_ai.request.model} - model = INFERENCE_GEN_AI_SYSTEM_NAME - if kwargs.get('model') is not None: + if kwargs.get('model') is None: + span_name = f"chat" + else: model = kwargs.get('model') - span_name = f"chat {model}" + span_name = f"chat {model}" + span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) try: # tracing events not supported in azure-core-tracing-opentelemetry From cd8bba21a179ee8fdc7dcefcc08e754eed329cd0 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Tue, 17 Sep 2024 13:01:24 -0500 Subject: [PATCH 12/35] some fixes --- .../core/tracing/ai/inference/__init__.py | 2 +- .../_ai_inference_api_instrumentor.py | 2 +- .../_ai_inference_api_instrumentor_impl.py | 20 +++++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py index f28d7b1d1317..bd8ddc1e73b7 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ -from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor +from ._ai_inference_api_instrumentor import AIInferenceApiInstrumentor diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py index 09e25113902e..5156b77ee11b 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py @@ -5,7 +5,7 @@ from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor -class AiInferenceApiInstrumentor(AzureTelemetryInstrumentor): +class AIInferenceApiInstrumentor(AzureTelemetryInstrumentor): def __init__(self): super().__init__() diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py index 69de1bda9365..d41ab2ad74ca 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py @@ -232,14 +232,18 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: del accumulate['message']['content'] if not accumulate['message']: del accumulate['message'] - - span.span_instance.add_event( - name="gen_ai.choice", - attributes={ - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(accumulate) - } - ) + if 'message' in accumulate: + if 'tool_calls' in accumulate['message']: + tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(accumulate['message']['tool_calls']) + accumulate['message']['tool_calls'] = [tool for tool in tool_calls_function_names_and_arguments_removed] + + span.span_instance.add_event( + name="gen_ai.choice", + attributes={ + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.event.content": json.dumps(accumulate) + } + ) span.finish() return StreamWrapper(stream_obj) From b28a3fe2e826c79054dd3fdafc51d283fe7fa6db Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 20 Sep 2024 16:55:16 -0500 Subject: [PATCH 13/35] adding sync trace tests --- .../azure-ai-inference/dev_requirements.txt | 4 +- .../tests/gen_ai_trace_verifier.py | 103 ++ .../tests/memory_trace_exporter.py | 39 + .../tests/test_model_inference_client.py | 1110 +++++++++++++++++ 4 files changed, 1255 insertions(+), 1 deletion(-) create mode 100644 sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py create mode 100644 sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py diff --git a/sdk/ai/azure-ai-inference/dev_requirements.txt b/sdk/ai/azure-ai-inference/dev_requirements.txt index 105486471444..4f5b55a5a48a 100644 --- a/sdk/ai/azure-ai-inference/dev_requirements.txt +++ b/sdk/ai/azure-ai-inference/dev_requirements.txt @@ -1,3 +1,5 @@ -e ../../../tools/azure-sdk-tools ../../core/azure-core -aiohttp \ No newline at end of file +../../core/azure-core-tracing-opentelemetry +aiohttp +opentelemetry-sdk \ No newline at end of file diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py new file mode 100644 index 000000000000..aeb8266abbc7 --- /dev/null +++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py @@ -0,0 +1,103 @@ +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# ------------------------------------ +import datetime +import json +from opentelemetry.sdk.trace import Span + + +class GenAiTraceVerifier: + + def check_span_attributes(self, span, attributes): + # Convert the list of tuples to a dictionary for easier lookup + attribute_dict = dict(attributes) + + for attribute_name in span.attributes.keys(): + # Check if the attribute name exists in the input attributes + if attribute_name not in attribute_dict: + return False + + attribute_value = attribute_dict[attribute_name] + if isinstance(attribute_value, list): + # Check if the attribute value in the span matches the provided list + if span.attributes[attribute_name] != attribute_value: + return False + elif isinstance(attribute_value, tuple): + # Check if the attribute value in the span matches the provided list + if span.attributes[attribute_name] != attribute_value: + return False + else: + # Check if the attribute value matches the provided value + if attribute_value != "" and span.attributes[attribute_name] != attribute_value: + return False + # Check if the attribute value in the span is not empty when the provided value is "" + elif attribute_value == "" and not span.attributes[attribute_name]: + return False + + return True + + def is_valid_json(self, my_string): + try: + json_object = json.loads(my_string) + except ValueError as e1: + return False + except TypeError as e2: + return False + return True + + def check_json_string(self, expected_json, actual_json): + if self.is_valid_json(expected_json) and self.is_valid_json(actual_json): + return self.check_event_attributes(json.loads(expected_json), json.loads(actual_json)) + else: + return False + + def check_event_attributes(self, expected_dict, actual_dict): + if set(expected_dict.keys()) != set(actual_dict.keys()): + return False + for key, expected_val in expected_dict.items(): + if key not in actual_dict: + return False + actual_val = actual_dict[key] + + if self.is_valid_json(expected_val): + if not self.is_valid_json(actual_val): + return False + if not self.check_json_string(expected_val, actual_val): + return False + elif isinstance(expected_val, dict): + if not isinstance(actual_val, dict): + return False + if not self.check_event_attributes(expected_val, actual_val): + return False + elif isinstance(expected_val, list): + if not isinstance(actual_val, list): + return False + if len(expected_val) != len(actual_val): + return False + for expected_list, actual_list in zip(expected_val, actual_val): + if not self.check_event_attributes(expected_list, actual_list): + return False + elif isinstance(expected_val, str) and expected_val == "*": + if actual_val == "": + return False + elif expected_val != actual_val: + return False + return True + + def check_span_events(self, span, expected_events): + span_events = list(span.events) # Create a list of events from the span + + for expected_event in expected_events: + for actual_event in span_events: + if expected_event['name'] == actual_event.name: + if not self.check_event_attributes(expected_event['attributes'], actual_event.attributes): + return False + span_events.remove(actual_event) # Remove the matched event from the span_events + break + else: + return False # If no match found for an expected event + + if len(span_events) > 0: # If there are any additional events in the span_events + return False + + return True diff --git a/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py new file mode 100644 index 000000000000..7563e65cfc87 --- /dev/null +++ b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py @@ -0,0 +1,39 @@ +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +from opentelemetry.sdk.trace import Span +from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult +from typing import List, Sequence + + +class MemoryTraceExporter(SpanExporter): + + def __init__(self): + self._trace_list = [] + + def export(self, spans: Sequence[Span]) -> SpanExportResult: + for span in spans: + self._trace_list.append(span) + return SpanExportResult.SUCCESS + + def shutdown(self) -> None: + self._trace_list.clear() + + def get_trace_list(self) -> List[Span]: + return self._trace_list + + def contains(self, text: str) -> bool: + for span in self._trace_list: + if text in str(span): + return True + return False + + def get_spans_by_name_starts_with(self, name_prefix: str) -> List[Span]: + return [span for span in self._trace_list if span.name.startswith(name_prefix)] + + def get_spans_by_name(self, name: str) -> List[Span]: + return [span for span in self._trace_list if span.name == name] + + def get_spans(self) -> List[Span]: + return [span for span in self._trace_list] \ No newline at end of file diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index 89d1e4d90b3d..852bd908d5c3 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # ------------------------------------ import os +import datetime import json import azure.ai.inference as sdk @@ -13,14 +14,36 @@ ServicePreparerEmbeddings, ) from azure.core.pipeline.transport import RequestsTransport +from azure.core.settings import settings from devtools_testutils import recorded_by_proxy from azure.core.exceptions import AzureError, ServiceRequestError from azure.core.credentials import AzureKeyCredential +from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor +from memory_trace_exporter import MemoryTraceExporter +from gen_ai_trace_verifier import GenAiTraceVerifier +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +CONTENT_TRACING_ENV_VARIABLE = "AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED" +content_tracing_initial_value = os.getenv(CONTENT_TRACING_ENV_VARIABLE) # The test class name needs to start with "Test" to get collected by pytest class TestModelClient(ModelClientTestBase): + @classmethod + def teardown_class(cls): + mode = 'a' if os.path.exists("teardown.txt") else 'w' + + with open("teardown.txt", mode) as file: + # Get current timestamp + timestamp = datetime.datetime.now() + text = "Setting " + CONTENT_TRACING_ENV_VARIABLE + " to " + str(content_tracing_initial_value) + # Add timestamp to the start of the text and write to file + file.write(f'{timestamp}: {text}\n') + if content_tracing_initial_value is not None: + os.environ[CONTENT_TRACING_ENV_VARIABLE] = content_tracing_initial_value + # ********************************************************************************** # # UNIT TESTS @@ -795,3 +818,1090 @@ def test_embeddings_on_chat_completion_endpoint(self, **kwargs): assert "not found" in e.message.lower() or "not allowed" in e.message.lower() client.close() assert exception_caught + + + # ********************************************************************************** + # + # TRACING TESTS - CHAT COMPLETIONS + # + # ********************************************************************************** + + def setup_memory_trace_exporter(self) -> MemoryTraceExporter: + # Setup Azure Core settings to use OpenTelemetry tracing + settings.tracing_implementation = "OpenTelemetry" + trace.set_tracer_provider(TracerProvider()) + tracer = trace.get_tracer(__name__) + memoryExporter = MemoryTraceExporter() + span_processor = SimpleSpanProcessor(memoryExporter) + trace.get_tracer_provider().add_span_processor(span_processor) + return span_processor, memoryExporter + + def modify_env_var(self, name, new_value): + current_value = os.getenv(name) + os.environ[name] = new_value + return current_value + + @ServicePreparerChatCompletions() + def test_instrumentation(self, **kwargs): + client = self._create_chat_client(**kwargs) + exception_caught = False + try: + assert AIInferenceApiInstrumentor().is_instrumented() == False + AIInferenceApiInstrumentor().instrument() + assert AIInferenceApiInstrumentor().is_instrumented() == True + AIInferenceApiInstrumentor().uninstrument() + assert AIInferenceApiInstrumentor().is_instrumented() == False + except RuntimeError as e: + exception_caught = True + print(e) + client.close() + assert exception_caught == False + + @ServicePreparerChatCompletions() + def test_instrumenting_twice_causes_exception(self, **kwargs): + client = self._create_chat_client(**kwargs) + exception_caught = False + instrumented_once = False + try: + AIInferenceApiInstrumentor().instrument() + instrumented_once = True + AIInferenceApiInstrumentor().instrument() + except RuntimeError as e: + exception_caught = True + print(e) + client.close() + assert instrumented_once == True + assert exception_caught == True + + @ServicePreparerChatCompletions() + def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): + client = self._create_chat_client(**kwargs) + exception_caught = False + try: + AIInferenceApiInstrumentor().uninstrument() + except RuntimeError as e: + exception_caught = True + print(e) + client.close() + assert exception_caught == True + + @ServicePreparerChatCompletions() + def test_uninstrumenting_twise_causes_exception(self, **kwargs): + client = self._create_chat_client(**kwargs) + exception_caught = False + uninstrumented_once = False + try: + AIInferenceApiInstrumentor().instrument() + AIInferenceApiInstrumentor().uninstrument() + uninstrumented_once = True + AIInferenceApiInstrumentor().uninstrument() + except RuntimeError as e: + exception_caught = True + print(e) + client.close() + assert uninstrumented_once == True + assert exception_caught == True + + @ServicePreparerChatCompletions() + def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + response = client.complete( + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the capital of France?"), + ], + ) + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 1 + span = spans[0] + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match == True + + expected_events = [ + { + 'name': 'gen_ai.choice', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"finish_reason": "stop", "index": 0}' + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(span, expected_events) + assert events_match == True + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + response = client.complete( + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the capital of France?"), + ], + ) + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 1 + span = spans[0] + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match == True + + expected_events = [ + { + 'name': 'gen_ai.system.message', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"role": "system", "content": "You are a helpful assistant."}' + } + }, + { + 'name': 'gen_ai.user.message', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"role": "user", "content": "What is the capital of France?"}' + } + }, + { + 'name': 'gen_ai.choice', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"message": {"content": "*"}, "finish_reason": "stop", "index": 0}' + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(span, expected_events) + assert events_match == True + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs): + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + response = client.complete( + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the capital of France?"), + ], + stream=True + ) + response_content = "" + for update in response: + if update.choices: + response_content = response_content + update.choices[0].delta.content + client.close() + + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 1 + span = spans[0] + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match == True + + expected_events = [ + { + 'name': 'gen_ai.choice', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"finish_reason": "stop", "index": 0}' + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(span, expected_events) + assert events_match == True + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs): + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + response = client.complete( + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the capital of France?"), + ], + stream=True + ) + response_content = "" + for update in response: + if update.choices: + response_content = response_content + update.choices[0].delta.content + client.close() + + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 1 + span = spans[0] + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match == True + + expected_events = [ + { + 'name': 'gen_ai.system.message', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"role": "system", "content": "You are a helpful assistant."}' + } + }, + { + 'name': 'gen_ai.user.message', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"role": "user", "content": "What is the capital of France?"}' + } + }, + { + 'name': 'gen_ai.choice', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"message": {"content": "*"}, "finish_reason": "stop", "index": 0}' + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(span, expected_events) + assert events_match == True + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs): + import json + from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition + from azure.ai.inference import ChatCompletionsClient + + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + + def get_weather(city: str) -> str: + if city == "Seattle": + return "Nice weather" + elif city == "New York City": + return "Good weather" + else: + return "Unavailable" + + weather_description = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_weather", + description="Returns description of the weather in the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which weather info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the weather in Seattle?"), + ] + + response = client.complete(messages=messages, tools=[weather_description]) + + if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS: + # Append the previous model response to the chat history + messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls)) + # The tool should be of type function call. + if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0: + for tool_call in response.choices[0].message.tool_calls: + if type(tool_call) is ChatCompletionsToolCall: + function_args = json.loads(tool_call.function.arguments.replace("'", '"')) + print(f"Calling function `{tool_call.function.name}` with arguments {function_args}") + callable_func = locals()[tool_call.function.name] + function_response = callable_func(**function_args) + print(f"Function response = {function_response}") + # Provide the tool response to the model, by appending it to the chat history + messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response)) + # With the additional tools information on hand, get another response from the model + response = client.complete(messages=messages, tools=[weather_description]) + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 2 + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('tool_calls',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) + assert attributes_match == True + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) + assert attributes_match == True + + expected_events = [ + { + "name": "gen_ai.system.message", + "timestamp": "", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" + } + }, + { + "name": "gen_ai.user.message", + "timestamp": "", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" + } + }, + { + "name": "gen_ai.choice", + "timestamp": "", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events) + assert events_match == True + + expected_events = [ + { + "name": "gen_ai.system.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" + } + }, + { + "name": "gen_ai.user.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" + } + }, + { + "name": "gen_ai.assistant.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}" + } + }, + { + "name": "gen_ai.tool.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}" + } + }, + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) + assert events_match == True + + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs): + import json + from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition + from azure.ai.inference import ChatCompletionsClient + + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + + def get_weather(city: str) -> str: + if city == "Seattle": + return "Nice weather" + elif city == "New York City": + return "Good weather" + else: + return "Unavailable" + + weather_description = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_weather", + description="Returns description of the weather in the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which weather info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the weather in Seattle?"), + ] + + response = client.complete(messages=messages, tools=[weather_description]) + + if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS: + # Append the previous model response to the chat history + messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls)) + # The tool should be of type function call. + if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0: + for tool_call in response.choices[0].message.tool_calls: + if type(tool_call) is ChatCompletionsToolCall: + function_args = json.loads(tool_call.function.arguments.replace("'", '"')) + print(f"Calling function `{tool_call.function.name}` with arguments {function_args}") + callable_func = locals()[tool_call.function.name] + function_response = callable_func(**function_args) + print(f"Function response = {function_response}") + # Provide the tool response to the model, by appending it to the chat history + messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response)) + # With the additional tools information on hand, get another response from the model + response = client.complete(messages=messages, tools=[weather_description]) + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 2 + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('tool_calls',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) + assert attributes_match == True + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) + assert attributes_match == True + + expected_events = [ + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"finish_reason\": \"tool_calls\", \"index\": 0, \"message\": {\"tool_calls\": [{\"function\": {\"call_id\": null}, \"id\": \"*\", \"type\": \"function\"}]}}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events) + assert events_match == True + + expected_events = [ + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"finish_reason\": \"stop\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) + assert events_match == True + + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs): + import json + from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition + from azure.ai.inference import ChatCompletionsClient + + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + + def get_weather(city: str) -> str: + if city == "Seattle": + return "Nice weather" + elif city == "New York City": + return "Good weather" + else: + return "Unavailable" + + weather_description = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_weather", + description="Returns description of the weather in the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which weather info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the weather in Seattle?"), + ] + + response = client.complete(messages=messages, tools=[weather_description]) + + if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS: + # Append the previous model response to the chat history + messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls)) + # The tool should be of type function call. + if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0: + for tool_call in response.choices[0].message.tool_calls: + if type(tool_call) is ChatCompletionsToolCall: + function_args = json.loads(tool_call.function.arguments.replace("'", '"')) + print(f"Calling function `{tool_call.function.name}` with arguments {function_args}") + callable_func = locals()[tool_call.function.name] + function_response = callable_func(**function_args) + print(f"Function response = {function_response}") + # Provide the tool response to the model, by appending it to the chat history + messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response)) + # With the additional tools information on hand, get another response from the model + response = client.complete(messages=messages, tools=[weather_description]) + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 2 + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('tool_calls',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) + assert attributes_match == True + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) + assert attributes_match == True + + expected_events = [ + { + "name": "gen_ai.system.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" + } + }, + { + "name": "gen_ai.user.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" + } + }, + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events) + assert events_match == True + + expected_events = [ + { + "name": "gen_ai.system.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" + } + }, + { + "name": "gen_ai.user.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" + } + }, + { + "name": "gen_ai.assistant.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}" + } + }, + { + "name": "gen_ai.tool.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}" + } + }, + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) + assert events_match == True + + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs): + import json + from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition + from azure.ai.inference import ChatCompletionsClient + + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + + def get_weather(city: str) -> str: + if city == "Seattle": + return "Nice weather" + elif city == "New York City": + return "Good weather" + else: + return "Unavailable" + + weather_description = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_weather", + description="Returns description of the weather in the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which weather info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + messages=[ + sdk.models.SystemMessage(content="You are a helpful AI assistant."), + sdk.models.UserMessage(content="What is the weather in Seattle?"), + ] + + response = client.complete( + messages=messages, + tools=[weather_description], + stream=True) + + # At this point we expect a function tool call in the model response + tool_call_id: str = "" + function_name: str = "" + function_args: str = "" + for update in response: + if update.choices[0].delta.tool_calls is not None: + if update.choices[0].delta.tool_calls[0].function.name is not None: + function_name = update.choices[0].delta.tool_calls[0].function.name + if update.choices[0].delta.tool_calls[0].id is not None: + tool_call_id = update.choices[0].delta.tool_calls[0].id + function_args += update.choices[0].delta.tool_calls[0].function.arguments or "" + + # Append the previous model response to the chat history + messages.append( + AssistantMessage( + tool_calls=[ + ChatCompletionsToolCall( + id=tool_call_id, + function=FunctionCall( + name=function_name, + arguments=function_args + ) + ) + ] + ) + ) + + # Make the function call + callable_func = locals()[function_name] + function_args_mapping = json.loads(function_args.replace("'", '"')) + function_response = callable_func(**function_args_mapping) + + # Append the function response as a tool message to the chat history + messages.append( + ToolMessage( + tool_call_id=tool_call_id, + content=function_response + ) + ) + + # With the additional tools information on hand, get another streaming response from the model + response = client.complete( + messages=messages, + tools=[weather_description], + stream=True + ) + + content = "" + for update in response: + content = content + update.choices[0].delta.content + + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 2 + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('tool_calls',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) + assert attributes_match == True + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) + assert attributes_match == True + + expected_events = [ + { + "name": "gen_ai.system.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful AI assistant.\"}" + } + }, + { + "name": "gen_ai.user.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" + } + }, + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"finish_reason\": \"tool_calls\", \"message\": {\"tool_calls\": [{\"id\": \"*\", \"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\"}}]}, \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events) + assert events_match == True + + expected_events = [ + { + "name": "gen_ai.system.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful AI assistant.\"}" + } + }, + { + "name": "gen_ai.user.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" + } + }, + { + "name": "gen_ai.assistant.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"id\": \"*\", \"function\": {\"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\"}, \"type\": \"function\"}]}" + } + }, + { + "name": "gen_ai.tool.message", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}" + } + }, + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) + assert events_match == True + + AIInferenceApiInstrumentor().uninstrument() + + @ServicePreparerChatCompletions() + def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs): + import json + from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition + from azure.ai.inference import ChatCompletionsClient + + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") + client = self._create_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + + def get_weather(city: str) -> str: + if city == "Seattle": + return "Nice weather" + elif city == "New York City": + return "Good weather" + else: + return "Unavailable" + + weather_description = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_weather", + description="Returns description of the weather in the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which weather info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the weather in Seattle?"), + ] + + response = client.complete( + messages=messages, + tools=[weather_description], + stream=True) + + # At this point we expect a function tool call in the model response + tool_call_id: str = "" + function_name: str = "" + function_args: str = "" + for update in response: + if update.choices[0].delta.tool_calls is not None: + if update.choices[0].delta.tool_calls[0].function.name is not None: + function_name = update.choices[0].delta.tool_calls[0].function.name + if update.choices[0].delta.tool_calls[0].id is not None: + tool_call_id = update.choices[0].delta.tool_calls[0].id + function_args += update.choices[0].delta.tool_calls[0].function.arguments or "" + + # Append the previous model response to the chat history + messages.append( + AssistantMessage( + tool_calls=[ + ChatCompletionsToolCall( + id=tool_call_id, + function=FunctionCall( + name=function_name, + arguments=function_args + ) + ) + ] + ) + ) + + # Make the function call + callable_func = locals()[function_name] + function_args_mapping = json.loads(function_args.replace("'", '"')) + function_response = callable_func(**function_args_mapping) + + # Append the function response as a tool message to the chat history + messages.append( + ToolMessage( + tool_call_id=tool_call_id, + content=function_response + ) + ) + + # With the additional tools information on hand, get another streaming response from the model + response = client.complete( + messages=messages, + tools=[weather_description], + stream=True + ) + + content = "" + for update in response: + content = content + update.choices[0].delta.content + + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 2 + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('tool_calls',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) + assert attributes_match == True + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) + assert attributes_match == True + + expected_events = [ + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"finish_reason\": \"tool_calls\", \"message\": {\"tool_calls\": [{\"id\": \"*\", \"type\": \"function\"}]}, \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events) + assert events_match == True + + expected_events = [ + { + "name": "gen_ai.choice", + "timestamp": "*", + "attributes": { + "gen_ai.system": "az.ai.inference", + "gen_ai.event.content": "{\"finish_reason\": \"stop\", \"index\": 0}" + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) + assert events_match == True + + AIInferenceApiInstrumentor().uninstrument() \ No newline at end of file From b549b38ae3a49775a5b8168cabf85bd811f1a734 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Mon, 23 Sep 2024 10:02:59 -0500 Subject: [PATCH 14/35] fix and async trace test --- .../test_model_inference_async_client.py | 78 +++++++++++++++++++ .../_ai_inference_api_instrumentor_impl.py | 6 +- 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py index 5c1717dcd764..fd630ffa706b 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py @@ -14,14 +14,28 @@ ServicePreparerEmbeddings, ) from azure.core.pipeline.transport import AioHttpTransport +from azure.core.settings import settings from devtools_testutils.aio import recorded_by_proxy_async from azure.core.exceptions import AzureError, ServiceRequestError from azure.core.credentials import AzureKeyCredential +from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor +from memory_trace_exporter import MemoryTraceExporter +from gen_ai_trace_verifier import GenAiTraceVerifier +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +CONTENT_TRACING_ENV_VARIABLE = "AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED" +content_tracing_initial_value = os.getenv(CONTENT_TRACING_ENV_VARIABLE) # The test class name needs to start with "Test" to get collected by pytest class TestModelAsyncClient(ModelClientTestBase): + @classmethod + def teardown_class(cls): + if content_tracing_initial_value is not None: + os.environ[CONTENT_TRACING_ENV_VARIABLE] = content_tracing_initial_value + # ********************************************************************************** # # EMBEDDINGS REGRESSION TESTS - NO SERVICE RESPONSE REQUIRED @@ -677,3 +691,67 @@ async def test_async_embeddings_with_auth_failure(self, **kwargs): assert "auth token validation failed" in e.message.lower() await client.close() assert exception_caught + + # ********************************************************************************** + # + # TRACING TESTS - CHAT COMPLETIONS + # + # ********************************************************************************** + + def setup_memory_trace_exporter(self) -> MemoryTraceExporter: + # Setup Azure Core settings to use OpenTelemetry tracing + settings.tracing_implementation = "OpenTelemetry" + trace.set_tracer_provider(TracerProvider()) + tracer = trace.get_tracer(__name__) + memoryExporter = MemoryTraceExporter() + span_processor = SimpleSpanProcessor(memoryExporter) + trace.get_tracer_provider().add_span_processor(span_processor) + return span_processor, memoryExporter + + def modify_env_var(self, name, new_value): + current_value = os.getenv(name) + os.environ[name] = new_value + return current_value + + @ServicePreparerChatCompletions() + async def test_chat_completion_async_tracing_content_recording_disabled(self, **kwargs): + self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") + client = self._create_async_chat_client(**kwargs) + processor, exporter = self.setup_memory_trace_exporter() + AIInferenceApiInstrumentor().instrument() + response = await client.complete( + messages=[ + sdk.models.SystemMessage(content="You are a helpful assistant."), + sdk.models.UserMessage(content="What is the capital of France?"), + ], + ) + processor.force_flush() + spans = exporter.get_spans_by_name_starts_with("chat ") + if len(spans) == 0: + spans = exporter.get_spans_by_name("chat") + assert len(spans) == 1 + span = spans[0] + expected_attributes = [('gen_ai.operation.name', 'chat'), + ('gen_ai.system', 'az.ai.inference'), + ('gen_ai.request.model', ''), + ('server.address', ''), + ('gen_ai.response.id', ''), + ('gen_ai.response.model', ''), + ('gen_ai.usage.input_tokens', ''), + ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.finish_reasons', ('stop',))] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match == True + + expected_events = [ + { + 'name': 'gen_ai.choice', + 'attributes': { + 'gen_ai.system': 'az.ai.inference', + 'gen_ai.event.content': '{"finish_reason": "stop", "index": 0}' + } + } + ] + events_match = GenAiTraceVerifier().check_span_events(span, expected_events) + assert events_match == True + AIInferenceApiInstrumentor().uninstrument() \ No newline at end of file diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py index d41ab2ad74ca..c003da6e4132 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py @@ -357,7 +357,7 @@ async def inner(*args, **kwargs): # tracing events not supported in azure-core-tracing-opentelemetry # so need to access the span instance directly with span_impl_type.change_context(span.span_instance): - _add_request_span_attributes(span, span_name, kwargs) + _add_request_span_attributes(span, span_name, args, kwargs) result = await function(*args, **kwargs) if kwargs.get("stream") is True: return _wrapped_stream(result, span) @@ -395,7 +395,9 @@ def _inference_apis(): sync_apis = ( ("azure.ai.inference", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"), ) - async_apis = () + async_apis = ( + ("azure.ai.inference.aio", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"), + ) return sync_apis, async_apis From 469d32ce3b2f0dc343520380d7b1f6c43eef5294 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Mon, 23 Sep 2024 16:24:30 -0500 Subject: [PATCH 15/35] updating readme and setup --- sdk/ai/azure-ai-inference/README.md | 91 +++++++++++++++++++ sdk/ai/azure-ai-inference/setup.py | 3 + .../tests/test_model_inference_client.py | 20 ++-- 3 files changed, 100 insertions(+), 14 deletions(-) diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md index faeab5703529..25d60dfe32e8 100644 --- a/sdk/ai/azure-ai-inference/README.md +++ b/sdk/ai/azure-ai-inference/README.md @@ -57,6 +57,14 @@ To update an existing installation of the package, use: pip install --upgrade azure-ai-inference ``` +If you want to install Azure AI Inferencing package with support for OpenTelemetry based tracing, use the following command: + +```bash +pip install azure-ai-inference[trace] +``` + + + ## Key concepts ### Create and authenticate a client directly, using API key or GitHub token @@ -451,6 +459,89 @@ TBD To generate embeddings for additional phrases, simply call `client.embed` multiple times using the same `client`. --> +## Tracing + +The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions. + +### Setup + +The environment variable AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED controls whether the actual message contents will be included in the traces or not. By default, the message contents are not include as part of the trace. Set the value of the environment variable to true (case insensitive) for the message contents to be included as part of the trace. Any other value will cause the message contents not to be traced. + +You also need to configure the tracing implementation in your code, like so: + +``` +from azure.core.settings import settings +settings.tracing_implementation = "opentelemetry" +``` + +### Trace Exporter(s) + +In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. The first example below shows how to setup an exporter to Azure Monitor. +Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to created Azure Monitor resource. +Configure the APPLICATIONINSIGHTS_CONNECTION_STRING based on your Azure Monitor resource. + +``` +# Setup tracing to Azure Monitor +from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter +trace.set_tracer_provider(TracerProvider()) +tracer = trace.get_tracer(__name__) +span_processor = BatchSpanProcessor( + AzureMonitorTraceExporter.from_connection_string( + os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"] + ) +) +trace.get_tracer_provider().add_span_processor(span_processor) +``` + +The following example shows how to setup tracing to console output. + +``` +# Setup tracing to console +exporter = ConsoleSpanExporter() +trace.set_tracer_provider(TracerProvider()) +tracer = trace.get_tracer(__name__) +trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter)) +``` +### Instrumentation + +Use the AIInferenceInstrumentor to instrument the Azure AI Inferencing API for LLM tracing, this will cause the LLM traces to be emitted from Azure AI Inferencing API. + +``` +from azure.core.tracing import AIInferenceApiInstrumentor +# Instrument AI Inference API +AIInferenceApiInstrumentor().instrument() +``` + +It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again. + +``` +AIInferenceApiInstrumentor().uninstrument() +``` + +### Tracing Your Own Functions +The @tracer.start_as_current_span decorator can be used to trace your own functions. This will trace the function parameters and their values. You can also add further attributes to the span in the function implementation as demonstrated below. Note that you will have to setup the tracer in your code before using the decorator. + +``` +# The @tracer.start_as_current_span decorator will +# trace the function call and enable adding additional attributes +# to the span in the function implementation. +@tracer.start_as_current_span("get_temperature") +def get_temperature(city: str) -> str: + + # Adding attributes to the current span + span = trace.get_current_span() + span.set_attribute("requested_city", city) + + if city == "Seattle": + return "75" + elif city == "New York City": + return "80" + else: + return "Unavailable" + + +``` + ## Troubleshooting ### Exceptions diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py index c7b5395a3f9f..54039451d91a 100644 --- a/sdk/ai/azure-ai-inference/setup.py +++ b/sdk/ai/azure-ai-inference/setup.py @@ -68,4 +68,7 @@ "typing-extensions>=4.6.0", ], python_requires=">=3.8", + extras_require={ + 'trace': ['azure-core-tracing-opentelemetry', 'opentelemetry-sdk', 'azure-monitor-opentelemetry-exporter'] + } ) diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index 852bd908d5c3..16568e77f486 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -33,14 +33,6 @@ class TestModelClient(ModelClientTestBase): @classmethod def teardown_class(cls): - mode = 'a' if os.path.exists("teardown.txt") else 'w' - - with open("teardown.txt", mode) as file: - # Get current timestamp - timestamp = datetime.datetime.now() - text = "Setting " + CONTENT_TRACING_ENV_VARIABLE + " to " + str(content_tracing_initial_value) - # Add timestamp to the start of the text and write to file - file.write(f'{timestamp}: {text}\n') if content_tracing_initial_value is not None: os.environ[CONTENT_TRACING_ENV_VARIABLE] = content_tracing_initial_value @@ -1205,7 +1197,7 @@ def get_weather(city: str) -> str: expected_events = [ { "name": "gen_ai.system.message", - "timestamp": "", + "timestamp": "*", "attributes": { "gen_ai.system": "az.ai.inference", "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" @@ -1213,7 +1205,7 @@ def get_weather(city: str) -> str: }, { "name": "gen_ai.user.message", - "timestamp": "", + "timestamp": "*", "attributes": { "gen_ai.system": "az.ai.inference", "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" @@ -1221,7 +1213,7 @@ def get_weather(city: str) -> str: }, { "name": "gen_ai.choice", - "timestamp": "", + "timestamp": "*", "attributes": { "gen_ai.system": "az.ai.inference", "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}" @@ -1481,7 +1473,7 @@ def get_weather(city: str) -> str: expected_events = [ { "name": "gen_ai.system.message", - "timestamp": "*", + "timestamp": "", "attributes": { "gen_ai.system": "az.ai.inference", "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" @@ -1489,7 +1481,7 @@ def get_weather(city: str) -> str: }, { "name": "gen_ai.user.message", - "timestamp": "*", + "timestamp": "", "attributes": { "gen_ai.system": "az.ai.inference", "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" @@ -1497,7 +1489,7 @@ def get_weather(city: str) -> str: }, { "name": "gen_ai.choice", - "timestamp": "*", + "timestamp": "", "attributes": { "gen_ai.system": "az.ai.inference", "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}" From f1424a13026bde10c40c41db450b4e183c4a8f0a Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Mon, 23 Sep 2024 16:32:08 -0500 Subject: [PATCH 16/35] adding tracing sample --- .../sample_chat_completions_with_tracing.py | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py new file mode 100644 index 000000000000..8ac19ab1d56f --- /dev/null +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -0,0 +1,168 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import os +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.sdk.trace.export import ConsoleSpanExporter +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from azure.ai.inference import ChatCompletionsClient +from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason +from azure.core.credentials import AzureKeyCredential +from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor +from azure.core.settings import settings + + +# Setup tracing to console +exporter = ConsoleSpanExporter() +trace.set_tracer_provider(TracerProvider()) +tracer = trace.get_tracer(__name__) +trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter)) + +# Use the following code to setup tracing to Application Insights +# from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter +# trace.set_tracer_provider(TracerProvider()) +# tracer = trace.get_tracer(__name__) +# span_processor = BatchSpanProcessor( +# AzureMonitorTraceExporter.from_connection_string( +# os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"] +# ) +# ) +# trace.get_tracer_provider().add_span_processor(span_processor) + + +def chat_completion_streaming(key, endpoint, model_name): + client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) + response = client.complete( + stream=True, + messages=[ + SystemMessage(content="You are a helpful assistant."), + UserMessage(content="Tell me about software engineering in five sentences."), + ], + model=model_name, + ) + for update in response: + if update.choices: + print(update.choices[0].delta.content or "", end="") + pass + client.close() + + +# The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes +# to the span in the function implementation. Note that this will trace the function parameters and their values. +@tracer.start_as_current_span("get_temperature") +def get_temperature(city: str) -> str: + + # Adding attributes to the current span + span = trace.get_current_span() + span.set_attribute("requested_city", city) + + if city == "Seattle": + return "75" + elif city == "New York City": + return "80" + else: + return "Unavailable" + + +def get_weather(city: str) -> str: + if city == "Seattle": + return "Nice weather" + elif city == "New York City": + return "Good weather" + else: + return "Unavailable" + + +def chat_completion_with_function_call(key, endpoint, model_name): + import json + from azure.ai.inference.models import ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition + + weather_description = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_weather", + description="Returns description of the weather in the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which weather info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + + temperature_in_city = ChatCompletionsToolDefinition( + function=FunctionDefinition( + name="get_temperature", + description="Returns the current temperature for the specified city", + parameters={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city for which temperature info is requested", + }, + }, + "required": ["city"], + }, + ) + ) + + client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) + messages=[ + SystemMessage(content="You are a helpful assistant."), + UserMessage(content="What is the weather and temperature in Seattle?"), + ] + + response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city]) + + if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS: + # Append the previous model response to the chat history + messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls)) + # The tool should be of type function call. + if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0: + for tool_call in response.choices[0].message.tool_calls: + if type(tool_call) is ChatCompletionsToolCall: + function_args = json.loads(tool_call.function.arguments.replace("'", '"')) + print(f"Calling function `{tool_call.function.name}` with arguments {function_args}") + callable_func = globals()[tool_call.function.name] + function_response = callable_func(**function_args) + print(f"Function response = {function_response}") + # Provide the tool response to the model, by appending it to the chat history + messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response)) + # With the additional tools information on hand, get another response from the model + response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city]) + + print(f"Model response = {response.choices[0].message.content}") + + +def main(): + # Setup Azure Core settings to use OpenTelemetry tracing + settings.tracing_implementation = "OpenTelemetry" + + # Instrument AI Inference API + AIInferenceApiInstrumentor().instrument() + + # Read AI Inference API configuration + endpoint = os.environ.get("AZUREAI_ENDPOINT_URL") + key = os.environ.get("AZUREAI_ENDPOINT_KEY") + model_name = os.environ.get("AZUREAI_MODEL_NAME") + + print("===== starting chat_completion_streaming() =====") + chat_completion_streaming(key, endpoint, model_name) + print("===== chat_completion_streaming() done =====") + + print("===== starting chat_completion_with_function_call() =====") + chat_completion_with_function_call(key, endpoint, model_name) + print("===== chat_completion_with_function_call() done =====") + AIInferenceApiInstrumentor().uninstrument() + + +if __name__ == "__main__": + main() From 92da09a7b15aa060e44424540385067cc4535a2c Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Wed, 25 Sep 2024 13:50:03 -0500 Subject: [PATCH 17/35] changes based on review comments --- sdk/ai/azure-ai-inference/README.md | 164 +++++---- sdk/ai/azure-ai-inference/samples/README.md | 1 + .../sample_chat_completions_with_tracing.py | 93 +++-- sdk/ai/azure-ai-inference/setup.py | 2 +- .../tests/gen_ai_trace_verifier.py | 9 +- .../test_model_inference_async_client.py | 6 +- .../tests/test_model_inference_client.py | 338 +++++------------- .../core/tracing/ai/inference/__init__.py | 2 +- ...entor.py => _ai_inference_instrumentor.py} | 13 +- ....py => _ai_inference_instrumentor_impl.py} | 9 +- .../inference/azure_telemetry_instrumentor.py | 20 -- 11 files changed, 250 insertions(+), 407 deletions(-) rename sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/{_ai_inference_api_instrumentor.py => _ai_inference_instrumentor.py} (64%) rename sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/{_ai_inference_api_instrumentor_impl.py => _ai_inference_instrumentor_impl.py} (98%) delete mode 100644 sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md index 25d60dfe32e8..d2e593b30646 100644 --- a/sdk/ai/azure-ai-inference/README.md +++ b/sdk/ai/azure-ai-inference/README.md @@ -459,89 +459,6 @@ TBD To generate embeddings for additional phrases, simply call `client.embed` multiple times using the same `client`. --> -## Tracing - -The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions. - -### Setup - -The environment variable AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED controls whether the actual message contents will be included in the traces or not. By default, the message contents are not include as part of the trace. Set the value of the environment variable to true (case insensitive) for the message contents to be included as part of the trace. Any other value will cause the message contents not to be traced. - -You also need to configure the tracing implementation in your code, like so: - -``` -from azure.core.settings import settings -settings.tracing_implementation = "opentelemetry" -``` - -### Trace Exporter(s) - -In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. The first example below shows how to setup an exporter to Azure Monitor. -Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to created Azure Monitor resource. -Configure the APPLICATIONINSIGHTS_CONNECTION_STRING based on your Azure Monitor resource. - -``` -# Setup tracing to Azure Monitor -from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter -trace.set_tracer_provider(TracerProvider()) -tracer = trace.get_tracer(__name__) -span_processor = BatchSpanProcessor( - AzureMonitorTraceExporter.from_connection_string( - os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"] - ) -) -trace.get_tracer_provider().add_span_processor(span_processor) -``` - -The following example shows how to setup tracing to console output. - -``` -# Setup tracing to console -exporter = ConsoleSpanExporter() -trace.set_tracer_provider(TracerProvider()) -tracer = trace.get_tracer(__name__) -trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter)) -``` -### Instrumentation - -Use the AIInferenceInstrumentor to instrument the Azure AI Inferencing API for LLM tracing, this will cause the LLM traces to be emitted from Azure AI Inferencing API. - -``` -from azure.core.tracing import AIInferenceApiInstrumentor -# Instrument AI Inference API -AIInferenceApiInstrumentor().instrument() -``` - -It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again. - -``` -AIInferenceApiInstrumentor().uninstrument() -``` - -### Tracing Your Own Functions -The @tracer.start_as_current_span decorator can be used to trace your own functions. This will trace the function parameters and their values. You can also add further attributes to the span in the function implementation as demonstrated below. Note that you will have to setup the tracer in your code before using the decorator. - -``` -# The @tracer.start_as_current_span decorator will -# trace the function call and enable adding additional attributes -# to the span in the function implementation. -@tracer.start_as_current_span("get_temperature") -def get_temperature(city: str) -> str: - - # Adding attributes to the current span - span = trace.get_current_span() - span.set_attribute("requested_city", city) - - if city == "Seattle": - return "75" - elif city == "New York City": - return "80" - else: - return "Unavailable" - - -``` - ## Troubleshooting ### Exceptions @@ -625,6 +542,87 @@ To report issues with the client library, or request additional features, please * Have a look at the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples) folder, containing fully runnable Python code for doing inference using synchronous and asynchronous clients. +## Tracing + +The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions. + +### Setup + +The environment variable AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED controls whether the actual message contents will be recorded in the traces or not. By default, the message contents are not recorded as part of the trace. When message content recording is disabled any function call tool related function names, function parameter names and function parameter values are also not recorded in the trace. Set the value of the environment variable to "true" (case insensitive) for the message contents to be recorded as part of the trace. Any other value will cause the message contents not to be recorded. + +You also need to configure the tracing implementation in your code by setting `AZURE_SDK_TRACING_IMPLEMENTATION` to `opentelemetry` or configuring it in the code with the following snippet: + + + +```python +from azure.core.settings import settings +settings.tracing_implementation = "opentelemetry" +``` + + + + +Please refer to [azure-core-tracing-documentation](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme) for more information. + +### Trace Exporter(s) + +In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. You can find more information [here](https://learn.microsoft.com/en-us/python/api/overview/azure/core-tracing-opentelemetry-readme?view=azure-python-preview). + +Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to create Azure Monitor resource for the Azure Monitor exporter. + +### Instrumentation + +Use the AIInferenceInstrumentor to instrument the Azure AI Inferencing API for LLM tracing, this will cause the LLM traces to be emitted from Azure AI Inferencing API. + + + +```python +from azure.core.tracing.ai.inference import AIInferenceInstrumentor +# Instrument AI Inference API +AIInferenceInstrumentor().instrument() +``` + + + + +It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again. + + + +```python +AIInferenceInstrumentor().uninstrument() +``` + + + +### Tracing Your Own Functions +The @tracer.start_as_current_span decorator can be used to trace your own functions. This will trace the function parameters and their values. You can also add further attributes to the span in the function implementation as demonstrated below. Note that you will have to setup the tracer in your code before using the decorator. More information is available [here](https://opentelemetry.io/docs/languages/python/). + + + +```python +from opentelemetry.trace import get_tracer +tracer = get_tracer(__name__) + +# The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes +# to the span in the function implementation. Note that this will trace the function parameters and their values. +@tracer.start_as_current_span("get_temperature") +def get_temperature(city: str) -> str: + + # Adding attributes to the current span + span = trace.get_current_span() + span.set_attribute("requested_city", city) + + if city == "Seattle": + return "75" + elif city == "New York City": + return "80" + else: + return "Unavailable" +``` + + + ## Contributing This project welcomes contributions and suggestions. Most contributions require diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md index 777ce3baf477..ebc8990ceb7a 100644 --- a/sdk/ai/azure-ai-inference/samples/README.md +++ b/sdk/ai/azure-ai-inference/samples/README.md @@ -105,6 +105,7 @@ similarly for the other samples. |[sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py) | Get AI model information using the chat completions client. Similarly can be done with all other clients. | |[sample_chat_completions_with_model_extras.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_model_extras.py) | Chat completions with additional model-specific parameters. | |[sample_chat_completions_azure_openai.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py) | Chat completions against Azure OpenAI endpoint. | +|[sample_chat_completions_with_tracing.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py) | Chat completions with traces enabled. Includes streaming and non-streaming chat operations. The non-streaming chat uses function call tool and also demonstrates how to add traces to client code so that they will get included as part of the traces that are emitted. | ### Text embeddings diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py index 8ac19ab1d56f..fdd107664c0a 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -1,19 +1,45 @@ -# --------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# --------------------------------------------------------- +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +DESCRIPTION: + This sample demonstrates how to get a chat completions response from + the service using a synchronous client. The sample also shows how to + set default chat compoletions configuration in the client constructor, + which will be applied to all `complete` calls to the service. + + This sample assumes the AI model is hosted on a Serverless API or + Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints, + the client constructor needs to be modified. See package documentation: + https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts + +USAGE: + python sample_chat_completions_with_tracing.py + + Set these two environment variables before running the sample: + 1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form + https://..models.ai.azure.com + where `your-deployment-name` is your unique AI Model deployment name, and + `your-azure-region` is the Azure region where your model is deployed. + 2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret. +""" + import os from opentelemetry import trace +# opentelemetry-sdk is required for the opentelemetry.sdk imports. +# You can install it with command "pip install opentelemetry.sdk". from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.sdk.trace.export import ConsoleSpanExporter -from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter from azure.ai.inference import ChatCompletionsClient from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason from azure.core.credentials import AzureKeyCredential -from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor -from azure.core.settings import settings + # [START trace_setting] +from azure.core.settings import settings +settings.tracing_implementation = "opentelemetry" +# [END trace_setting] # Setup tracing to console exporter = ConsoleSpanExporter() @@ -21,19 +47,8 @@ tracer = trace.get_tracer(__name__) trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter)) -# Use the following code to setup tracing to Application Insights -# from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter -# trace.set_tracer_provider(TracerProvider()) -# tracer = trace.get_tracer(__name__) -# span_processor = BatchSpanProcessor( -# AzureMonitorTraceExporter.from_connection_string( -# os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"] -# ) -# ) -# trace.get_tracer_provider().add_span_processor(span_processor) - -def chat_completion_streaming(key, endpoint, model_name): +def chat_completion_streaming(key, endpoint): client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) response = client.complete( stream=True, @@ -41,7 +56,6 @@ def chat_completion_streaming(key, endpoint, model_name): SystemMessage(content="You are a helpful assistant."), UserMessage(content="Tell me about software engineering in five sentences."), ], - model=model_name, ) for update in response: if update.choices: @@ -49,6 +63,9 @@ def chat_completion_streaming(key, endpoint, model_name): pass client.close() + # [START trace_function] +from opentelemetry.trace import get_tracer +tracer = get_tracer(__name__) # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes # to the span in the function implementation. Note that this will trace the function parameters and their values. @@ -65,6 +82,7 @@ def get_temperature(city: str) -> str: return "80" else: return "Unavailable" + # [END trace_function] def get_weather(city: str) -> str: @@ -76,7 +94,7 @@ def get_weather(city: str) -> str: return "Unavailable" -def chat_completion_with_function_call(key, endpoint, model_name): +def chat_completion_with_function_call(key, endpoint): import json from azure.ai.inference.models import ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition @@ -120,7 +138,7 @@ def chat_completion_with_function_call(key, endpoint, model_name): UserMessage(content="What is the weather and temperature in Seattle?"), ] - response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city]) + response = client.complete(messages=messages, tools=[weather_description, temperature_in_city]) if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS: # Append the previous model response to the chat history @@ -137,31 +155,36 @@ def chat_completion_with_function_call(key, endpoint, model_name): # Provide the tool response to the model, by appending it to the chat history messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response)) # With the additional tools information on hand, get another response from the model - response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city]) + response = client.complete(messages=messages, tools=[weather_description, temperature_in_city]) print(f"Model response = {response.choices[0].message.content}") def main(): - # Setup Azure Core settings to use OpenTelemetry tracing - settings.tracing_implementation = "OpenTelemetry" - + # [START instrument_inferencing] + from azure.core.tracing.ai.inference import AIInferenceInstrumentor # Instrument AI Inference API - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() + # [END instrument_inferencing] - # Read AI Inference API configuration - endpoint = os.environ.get("AZUREAI_ENDPOINT_URL") - key = os.environ.get("AZUREAI_ENDPOINT_KEY") - model_name = os.environ.get("AZUREAI_MODEL_NAME") + try: + endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"] + key = os.environ["AZURE_AI_CHAT_KEY"] + except KeyError: + print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'") + print("Set them before running this sample.") + exit() print("===== starting chat_completion_streaming() =====") - chat_completion_streaming(key, endpoint, model_name) + chat_completion_streaming(key, endpoint) print("===== chat_completion_streaming() done =====") print("===== starting chat_completion_with_function_call() =====") - chat_completion_with_function_call(key, endpoint, model_name) + chat_completion_with_function_call(key, endpoint) print("===== chat_completion_with_function_call() done =====") - AIInferenceApiInstrumentor().uninstrument() + # [START uninstrument_inferencing] + AIInferenceInstrumentor().uninstrument() + # [END uninstrument_inferencing] if __name__ == "__main__": diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py index 54039451d91a..98eff8671e37 100644 --- a/sdk/ai/azure-ai-inference/setup.py +++ b/sdk/ai/azure-ai-inference/setup.py @@ -69,6 +69,6 @@ ], python_requires=">=3.8", extras_require={ - 'trace': ['azure-core-tracing-opentelemetry', 'opentelemetry-sdk', 'azure-monitor-opentelemetry-exporter'] + 'trace': ['azure-core-tracing-opentelemetry'] } ) diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py index aeb8266abbc7..82e4b0665a09 100644 --- a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py +++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py @@ -1,7 +1,7 @@ # ------------------------------------ # Copyright (c) Microsoft Corporation. # ------------------------------------ -import datetime +import numbers import json from opentelemetry.sdk.trace import Span @@ -28,7 +28,12 @@ def check_span_attributes(self, span, attributes): return False else: # Check if the attribute value matches the provided value - if attribute_value != "" and span.attributes[attribute_name] != attribute_value: + if attribute_value == "+": + if not isinstance(span.attributes[attribute_name], numbers.Number): + return False + if span.attributes[attribute_name] < 0: + return False + elif attribute_value != "" and span.attributes[attribute_name] != attribute_value: return False # Check if the attribute value in the span is not empty when the provided value is "" elif attribute_value == "" and not span.attributes[attribute_name]: diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py index fd630ffa706b..1fb76e363738 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py @@ -18,7 +18,7 @@ from devtools_testutils.aio import recorded_by_proxy_async from azure.core.exceptions import AzureError, ServiceRequestError from azure.core.credentials import AzureKeyCredential -from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor +from azure.core.tracing.ai.inference import AIInferenceInstrumentor from memory_trace_exporter import MemoryTraceExporter from gen_ai_trace_verifier import GenAiTraceVerifier from opentelemetry import trace @@ -718,7 +718,7 @@ async def test_chat_completion_async_tracing_content_recording_disabled(self, ** self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_async_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() response = await client.complete( messages=[ sdk.models.SystemMessage(content="You are a helpful assistant."), @@ -754,4 +754,4 @@ async def test_chat_completion_async_tracing_content_recording_disabled(self, ** ] events_match = GenAiTraceVerifier().check_span_events(span, expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() \ No newline at end of file + AIInferenceInstrumentor().uninstrument() \ No newline at end of file diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index 16568e77f486..e0c27d66dbf9 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -18,7 +18,7 @@ from devtools_testutils import recorded_by_proxy from azure.core.exceptions import AzureError, ServiceRequestError from azure.core.credentials import AzureKeyCredential -from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor +from azure.core.tracing.ai.inference import AIInferenceInstrumentor from memory_trace_exporter import MemoryTraceExporter from gen_ai_trace_verifier import GenAiTraceVerifier from opentelemetry import trace @@ -838,11 +838,11 @@ def test_instrumentation(self, **kwargs): client = self._create_chat_client(**kwargs) exception_caught = False try: - assert AIInferenceApiInstrumentor().is_instrumented() == False - AIInferenceApiInstrumentor().instrument() - assert AIInferenceApiInstrumentor().is_instrumented() == True - AIInferenceApiInstrumentor().uninstrument() - assert AIInferenceApiInstrumentor().is_instrumented() == False + assert AIInferenceInstrumentor().is_instrumented() == False + AIInferenceInstrumentor().instrument() + assert AIInferenceInstrumentor().is_instrumented() == True + AIInferenceInstrumentor().uninstrument() + assert AIInferenceInstrumentor().is_instrumented() == False except RuntimeError as e: exception_caught = True print(e) @@ -855,9 +855,9 @@ def test_instrumenting_twice_causes_exception(self, **kwargs): exception_caught = False instrumented_once = False try: - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() instrumented_once = True - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() except RuntimeError as e: exception_caught = True print(e) @@ -870,7 +870,7 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): client = self._create_chat_client(**kwargs) exception_caught = False try: - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() except RuntimeError as e: exception_caught = True print(e) @@ -883,10 +883,10 @@ def test_uninstrumenting_twise_causes_exception(self, **kwargs): exception_caught = False uninstrumented_once = False try: - AIInferenceApiInstrumentor().instrument() - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().instrument() + AIInferenceInstrumentor().uninstrument() uninstrumented_once = True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() except RuntimeError as e: exception_caught = True print(e) @@ -899,7 +899,7 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() response = client.complete( messages=[ sdk.models.SystemMessage(content="You are a helpful assistant."), @@ -914,12 +914,12 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): span = spans[0] expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) assert attributes_match == True @@ -935,14 +935,14 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): ] events_match = GenAiTraceVerifier().check_span_events(span, expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() response = client.complete( messages=[ sdk.models.SystemMessage(content="You are a helpful assistant."), @@ -957,12 +957,12 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): span = spans[0] expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) assert attributes_match == True @@ -992,14 +992,14 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): ] events_match = GenAiTraceVerifier().check_span_events(span, expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs): self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() response = client.complete( messages=[ sdk.models.SystemMessage(content="You are a helpful assistant."), @@ -1021,12 +1021,12 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw span = spans[0] expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) assert attributes_match == True @@ -1042,14 +1042,14 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw ] events_match = GenAiTraceVerifier().check_span_events(span, expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs): self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() response = client.complete( messages=[ sdk.models.SystemMessage(content="You are a helpful assistant."), @@ -1071,12 +1071,12 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa span = spans[0] expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) assert attributes_match == True @@ -1106,7 +1106,7 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa ] events_match = GenAiTraceVerifier().check_span_events(span, expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs): @@ -1117,7 +1117,7 @@ def test_chat_completion_with_function_call_tracing_content_recording_enabled(se self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() def get_weather(city: str) -> str: if city == "Seattle": @@ -1173,23 +1173,23 @@ def get_weather(city: str) -> str: assert len(spans) == 2 expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) assert attributes_match == True expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) assert attributes_match == True @@ -1268,7 +1268,7 @@ def get_weather(city: str) -> str: events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs): @@ -1279,7 +1279,7 @@ def test_chat_completion_with_function_call_tracing_content_recording_disabled(s self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() def get_weather(city: str) -> str: if city == "Seattle": @@ -1335,23 +1335,23 @@ def get_weather(city: str) -> str: assert len(spans) == 2 expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) assert attributes_match == True expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) assert attributes_match == True @@ -1382,169 +1382,7 @@ def get_weather(city: str) -> str: events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() - - @ServicePreparerChatCompletions() - def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs): - import json - from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition - from azure.ai.inference import ChatCompletionsClient - - self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") - client = self._create_chat_client(**kwargs) - processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() - - def get_weather(city: str) -> str: - if city == "Seattle": - return "Nice weather" - elif city == "New York City": - return "Good weather" - else: - return "Unavailable" - - weather_description = ChatCompletionsToolDefinition( - function=FunctionDefinition( - name="get_weather", - description="Returns description of the weather in the specified city", - parameters={ - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The name of the city for which weather info is requested", - }, - }, - "required": ["city"], - }, - ) - ) - messages=[ - sdk.models.SystemMessage(content="You are a helpful assistant."), - sdk.models.UserMessage(content="What is the weather in Seattle?"), - ] - - response = client.complete(messages=messages, tools=[weather_description]) - - if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS: - # Append the previous model response to the chat history - messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls)) - # The tool should be of type function call. - if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0: - for tool_call in response.choices[0].message.tool_calls: - if type(tool_call) is ChatCompletionsToolCall: - function_args = json.loads(tool_call.function.arguments.replace("'", '"')) - print(f"Calling function `{tool_call.function.name}` with arguments {function_args}") - callable_func = locals()[tool_call.function.name] - function_response = callable_func(**function_args) - print(f"Function response = {function_response}") - # Provide the tool response to the model, by appending it to the chat history - messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response)) - # With the additional tools information on hand, get another response from the model - response = client.complete(messages=messages, tools=[weather_description]) - processor.force_flush() - spans = exporter.get_spans_by_name_starts_with("chat ") - if len(spans) == 0: - spans = exporter.get_spans_by_name("chat") - assert len(spans) == 2 - expected_attributes = [('gen_ai.operation.name', 'chat'), - ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), - ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), - ('gen_ai.response.finish_reasons', ('tool_calls',))] - attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) - assert attributes_match == True - expected_attributes = [('gen_ai.operation.name', 'chat'), - ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), - ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), - ('gen_ai.response.finish_reasons', ('stop',))] - attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) - assert attributes_match == True - - expected_events = [ - { - "name": "gen_ai.system.message", - "timestamp": "", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" - } - }, - { - "name": "gen_ai.user.message", - "timestamp": "", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" - } - }, - { - "name": "gen_ai.choice", - "timestamp": "", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}" - } - } - ] - events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events) - assert events_match == True - - expected_events = [ - { - "name": "gen_ai.system.message", - "timestamp": "*", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}" - } - }, - { - "name": "gen_ai.user.message", - "timestamp": "*", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}" - } - }, - { - "name": "gen_ai.assistant.message", - "timestamp": "*", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}" - } - }, - { - "name": "gen_ai.tool.message", - "timestamp": "*", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}" - } - }, - { - "name": "gen_ai.choice", - "timestamp": "*", - "attributes": { - "gen_ai.system": "az.ai.inference", - "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}" - } - } - ] - events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) - assert events_match == True - - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs): @@ -1555,7 +1393,7 @@ def test_chat_completion_with_function_call_streaming_tracing_content_recording_ self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() def get_weather(city: str) -> str: if city == "Seattle": @@ -1649,23 +1487,23 @@ def get_weather(city: str) -> str: assert len(spans) == 2 expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) assert attributes_match == True expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) assert attributes_match == True @@ -1744,7 +1582,7 @@ def get_weather(city: str) -> str: events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() + AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs): @@ -1755,7 +1593,7 @@ def test_chat_completion_with_function_call_streaming_tracing_content_recording_ self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() - AIInferenceApiInstrumentor().instrument() + AIInferenceInstrumentor().instrument() def get_weather(city: str) -> str: if city == "Seattle": @@ -1849,23 +1687,23 @@ def get_weather(city: str) -> str: assert len(spans) == 2 expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes) assert attributes_match == True expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), - ('server.address', ''), + ('gen_ai.request.model', 'chat'), + ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes) assert attributes_match == True @@ -1896,4 +1734,4 @@ def get_weather(city: str) -> str: events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events) assert events_match == True - AIInferenceApiInstrumentor().uninstrument() \ No newline at end of file + AIInferenceInstrumentor().uninstrument() \ No newline at end of file diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py index bd8ddc1e73b7..88064b3607a6 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ -from ._ai_inference_api_instrumentor import AIInferenceApiInstrumentor +from ._ai_inference_instrumentor import AIInferenceInstrumentor diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py similarity index 64% rename from sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py index 5156b77ee11b..883ecd63b2b9 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py @@ -2,10 +2,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- import os -from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor -class AIInferenceApiInstrumentor(AzureTelemetryInstrumentor): +class AIInferenceInstrumentor: def __init__(self): super().__init__() @@ -20,16 +19,16 @@ def instrument(self): var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") enable_content_tracing = self.str_to_bool(var_value) - from ._ai_inference_api_instrumentor_impl import _inject_inference_api - _inject_inference_api(enable_content_tracing) + from ._ai_inference_instrumentor_impl import _instrument_inference + _instrument_inference(enable_content_tracing) def uninstrument(self): if not self.is_instrumented(): raise RuntimeError("Not instrumented") - from ._ai_inference_api_instrumentor_impl import _restore_inference_api - _restore_inference_api() + from ._ai_inference_instrumentor_impl import _uninstrument_inference + _uninstrument_inference() def is_instrumented(self): - from ._ai_inference_api_instrumentor_impl import _is_instrumented + from ._ai_inference_instrumentor_impl import _is_instrumented return _is_instrumented() diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py similarity index 98% rename from sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py index c003da6e4132..17702c2de7eb 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py @@ -16,7 +16,6 @@ from azure.core.tracing import AbstractSpan from azure.core.tracing import SpanKind from azure.core.settings import settings -from azure.core.tracing.common import get_function_and_class_name from opentelemetry.trace import Status, StatusCode, Span _inference_traces_enabled: bool = False @@ -278,7 +277,7 @@ def inner(*args, **kwargs): if span_impl_type is None: return function(*args, **kwargs) - class_function_name = get_function_and_class_name(function, *args) + class_function_name = function.__qualname__ if class_function_name.startswith("ChatCompletionsClient.complete"): if kwargs.get('model') is None: @@ -343,7 +342,7 @@ async def inner(*args, **kwargs): if span_impl_type is None: return function(*args, **kwargs) - class_function_name = get_function_and_class_name(function, *args) + class_function_name = function.__qualname__ if class_function_name.startswith("ChatCompletionsClient.complete"): if kwargs.get('model') is None: @@ -436,7 +435,7 @@ def available_inference_apis_and_injectors(): yield from _generate_api_and_injector(_inference_api_list()) -def _inject_inference_api(enable_content_tracing: bool = False): +def _instrument_inference(enable_content_tracing: bool = False): """This function modifies the methods of the Inference API classes to inject logic before calling the original methods. The original methods are stored as _original attributes of the methods. """ @@ -452,7 +451,7 @@ def _inject_inference_api(enable_content_tracing: bool = False): setattr(api, method, injector(getattr(api, method), trace_type, name)) -def _restore_inference_api(): +def _uninstrument_inference(): """This function restores the original methods of the Inference API classes by assigning them back from the _original attributes of the modified methods. """ diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py deleted file mode 100644 index 7950a442363e..000000000000 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py +++ /dev/null @@ -1,20 +0,0 @@ -# --------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# --------------------------------------------------------- -from abc import ABC, abstractmethod - -class AzureTelemetryInstrumentor(ABC): - def __init__(self): - pass - - @abstractmethod - def instrument(self): - pass - - @abstractmethod - def uninstrument(self): - pass - - @abstractmethod - def is_instrumented(self): - pass \ No newline at end of file From d9652f5ec12bb87de5386ba1698e560c69673d33 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Thu, 26 Sep 2024 09:45:39 -0500 Subject: [PATCH 18/35] changed to readme based on review comments --- sdk/ai/azure-ai-inference/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md index d2e593b30646..bc5cd5b7a679 100644 --- a/sdk/ai/azure-ai-inference/README.md +++ b/sdk/ai/azure-ai-inference/README.md @@ -564,11 +564,12 @@ settings.tracing_implementation = "opentelemetry" Please refer to [azure-core-tracing-documentation](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme) for more information. -### Trace Exporter(s) +### Exporting Traces with OpenTelemetry -In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. You can find more information [here](https://learn.microsoft.com/en-us/python/api/overview/azure/core-tracing-opentelemetry-readme?view=azure-python-preview). +Azure AI Inference is instrumented with OpenTelemetry. In order to enable tracing you need to configure OpenTelemetry to export traces to your observability backend. +Refer to [Azure SDK tracing in Python](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme?view=azure-python-preview) for more details. -Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to create Azure Monitor resource for the Azure Monitor exporter. +Refer to [Azure Monitor OpenTelemetry documentation](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-enable?tabs=python) for the details on how to send Azure AI Inference traces to Azure Monitor and create Azure Monitor resource. ### Instrumentation From 6da2a7d143bd15355ff02544992755dbab179265 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Thu, 26 Sep 2024 12:27:21 -0500 Subject: [PATCH 19/35] removed distributed_trace and some other updates --- sdk/ai/azure-ai-inference/README.md | 11 +++++------ .../azure-ai-inference/azure/ai/inference/_patch.py | 1 - .../azure/ai/inference/aio/_patch.py | 1 - .../samples/sample_chat_completions_with_defaults.py | 2 +- .../samples/sample_chat_completions_with_tracing.py | 8 ++++---- .../azure-ai-inference/tests/gen_ai_trace_verifier.py | 1 + .../azure-ai-inference/tests/memory_trace_exporter.py | 2 +- 7 files changed, 12 insertions(+), 14 deletions(-) diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md index bc5cd5b7a679..85e493b1047b 100644 --- a/sdk/ai/azure-ai-inference/README.md +++ b/sdk/ai/azure-ai-inference/README.md @@ -538,10 +538,6 @@ For more information, see [Configure logging in the Azure libraries for Python]( To report issues with the client library, or request additional features, please open a GitHub issue [here](https://github.com/Azure/azure-sdk-for-python/issues) -## Next steps - -* Have a look at the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples) folder, containing fully runnable Python code for doing inference using synchronous and asynchronous clients. - ## Tracing The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions. @@ -561,7 +557,6 @@ settings.tracing_implementation = "opentelemetry" - Please refer to [azure-core-tracing-documentation](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme) for more information. ### Exporting Traces with OpenTelemetry @@ -586,7 +581,7 @@ AIInferenceInstrumentor().instrument() -It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again. +It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again. @@ -624,6 +619,10 @@ def get_temperature(city: str) -> str: +## Next steps + +* Have a look at the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples) folder, containing fully runnable Python code for doing inference using synchronous and asynchronous clients. + ## Contributing This project welcomes contributions and suggestions. Most contributions require diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py index 362fa75e2a91..9860f70a7a68 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py @@ -454,7 +454,6 @@ def complete( :raises ~azure.core.exceptions.HttpResponseError: """ - @distributed_trace def complete( self, body: Union[JSON, IO[bytes]] = _Unset, diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py index ac31fdb88108..121401ce0f65 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py @@ -437,7 +437,6 @@ async def complete( :raises ~azure.core.exceptions.HttpResponseError: """ - @distributed_trace_async async def complete( self, body: Union[JSON, IO[bytes]] = _Unset, diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py index 36f43a5601a4..011735a7e61f 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py @@ -6,7 +6,7 @@ DESCRIPTION: This sample demonstrates how to get a chat completions response from the service using a synchronous client. The sample also shows how to - set default chat compoletions configuration in the client constructor, + set default chat completions configuration in the client constructor, which will be applied to all `complete` calls to the service. This sample assumes the AI model is hosted on a Serverless API or diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py index fdd107664c0a..8580f1a51dff 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -4,10 +4,10 @@ # ------------------------------------ """ DESCRIPTION: - This sample demonstrates how to get a chat completions response from - the service using a synchronous client. The sample also shows how to - set default chat compoletions configuration in the client constructor, - which will be applied to all `complete` calls to the service. + This sample demonstrates how to use tracing with the Inference client library. + Azure AI Inference is instrumented with OpenTelemetry. In order to enable tracing + you need to configure OpenTelemetry to export traces to your observability backend. + This sample shows how to capture the traces to a file. This sample assumes the AI model is hosted on a Serverless API or Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints, diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py index 82e4b0665a09..29bb2ef57f47 100644 --- a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py +++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py @@ -1,5 +1,6 @@ # ------------------------------------ # Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. # ------------------------------------ import numbers import json diff --git a/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py index 7563e65cfc87..7b609fbf5724 100644 --- a/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py +++ b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py @@ -35,5 +35,5 @@ def get_spans_by_name_starts_with(self, name_prefix: str) -> List[Span]: def get_spans_by_name(self, name: str) -> List[Span]: return [span for span in self._trace_list if span.name == name] - def get_spans(self) -> List[Span]: + def get_spans(self) -> List[Span]: return [span for span in self._trace_list] \ No newline at end of file From 521f7f0c280cdf4d1a7fb8b2d5c3f33206a81894 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Thu, 26 Sep 2024 14:00:43 -0500 Subject: [PATCH 20/35] fixing pre python v3.10 issue --- .../tracing/ai/inference/_ai_inference_instrumentor_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py index 17702c2de7eb..b0543da2f361 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py @@ -10,7 +10,7 @@ import logging from urllib.parse import urlparse from enum import Enum -from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict +from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict, Union from azure.ai.inference.aio import ChatCompletionsClient from azure.ai.inference import models as _models from azure.core.tracing import AbstractSpan @@ -141,7 +141,7 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom span.span_instance.add_event(name="gen_ai.choice", attributes=attributes) -def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None: +def _add_response_chat_attributes(span: AbstractSpan, result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate]) -> None: _set_attributes( span, From 8c800992d4679db9458c50707390114e32191ca9 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Thu, 26 Sep 2024 16:16:18 -0500 Subject: [PATCH 21/35] test fixes --- .../test_model_inference_async_client.py | 8 +-- .../tests/test_model_inference_client.py | 49 ++++++++++--------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py index 1fb76e363738..70648401659a 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py @@ -733,12 +733,12 @@ async def test_chat_completion_async_tracing_content_recording_disabled(self, ** span = spans[0] expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), - ('gen_ai.request.model', ''), + ('gen_ai.request.model', 'chat'), ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', ''), - ('gen_ai.usage.input_tokens', ''), - ('gen_ai.usage.output_tokens', ''), + ('gen_ai.response.model', 'mistral-large'), + ('gen_ai.usage.input_tokens', '+'), + ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) assert attributes_match == True diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index e0c27d66dbf9..cabd63362ee1 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -861,6 +861,7 @@ def test_instrumenting_twice_causes_exception(self, **kwargs): except RuntimeError as e: exception_caught = True print(e) + AIInferenceInstrumentor().uninstrument() client.close() assert instrumented_once == True assert exception_caught == True @@ -915,9 +916,9 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -958,9 +959,9 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -1022,9 +1023,9 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -1072,9 +1073,9 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -1174,9 +1175,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] @@ -1185,9 +1186,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -1336,9 +1337,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] @@ -1347,9 +1348,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -1488,9 +1489,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] @@ -1499,9 +1500,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] @@ -1688,9 +1689,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('tool_calls',))] @@ -1699,9 +1700,9 @@ def get_weather(city: str) -> str: expected_attributes = [('gen_ai.operation.name', 'chat'), ('gen_ai.system', 'az.ai.inference'), ('gen_ai.request.model', 'chat'), - ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'), + ('server.address', ''), ('gen_ai.response.id', ''), - ('gen_ai.response.model', 'mistral-small'), + ('gen_ai.response.model', 'mistral-large'), ('gen_ai.usage.input_tokens', '+'), ('gen_ai.usage.output_tokens', '+'), ('gen_ai.response.finish_reasons', ('stop',))] From 514dea4fd6d6fb353198c8b2ae61c67d8b2f6b7f Mon Sep 17 00:00:00 2001 From: Darren Cohen <39422044+dargilco@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:59:04 -0700 Subject: [PATCH 22/35] Fix some of the non-trace tests --- sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py | 4 ++-- sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py | 2 +- .../tests/test_model_inference_async_client.py | 4 ++-- .../azure-ai-inference/tests/test_model_inference_client.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py index 9860f70a7a68..ce60a5d0c9b3 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py @@ -102,8 +102,8 @@ def load_client( "The AI model information is missing a value for `model type`. Cannot create an appropriate client." ) - # TODO: Remove "completions" and "embedding" once Mistral Large and Cohere fixes their model type - if model_info.model_type in (_models.ModelType.CHAT, "completion"): + # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type + if model_info.model_type in (_models.ModelType.CHAT, "completion", "chat-completion", "chat-completions"): chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init model_info diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py index 121401ce0f65..e0a25e7ef1a5 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py @@ -87,7 +87,7 @@ async def load_client( ) # TODO: Remove "completions" and "embedding" once Mistral Large and Cohere fixes their model type - if model_info.model_type in (_models.ModelType.CHAT, "completion"): + if model_info.model_type in (_models.ModelType.CHAT, "completion", "chat-completion", "chat-completions"): chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init model_info diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py index 1fb76e363738..2c5515fb243a 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py @@ -492,7 +492,7 @@ async def test_async_load_chat_completions_client(self, **kwargs): response1 = await client.get_model_info() self._print_model_info_result(response1) self._validate_model_info_result( - response1, "completion" + response1, "chat-completion" # TODO: This should be chat_completions based on REST API spec... ) # TODO: This should be ModelType.CHAT once the model is fixed await client.close() @@ -506,7 +506,7 @@ async def test_async_get_model_info_on_chat_client(self, **kwargs): assert client._model_info # pylint: disable=protected-access self._print_model_info_result(response1) self._validate_model_info_result( - response1, "completion" + response1, "chat-completion" ) # TODO: This should be ModelType.CHAT once the model is fixed # Get the model info again. No network calls should be made here, diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index e0c27d66dbf9..534d1c23224d 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -544,7 +544,7 @@ def test_load_chat_completions_client(self, **kwargs): response1 = client.get_model_info() self._print_model_info_result(response1) self._validate_model_info_result( - response1, "completion" + response1, "chat-completion" ) # TODO: This should be ModelType.CHAT once the model is fixed client.close() @@ -560,7 +560,7 @@ def test_get_model_info_on_chat_client(self, **kwargs): self._print_model_info_result(response1) self._validate_model_info_result( - response1, "completion" + response1, "chat-completion" # TODO: This should be chat_comletions according to REST API spec... ) # TODO: This should be ModelType.CHAT once the model is fixed # Get the model info again. No network calls should be made here, From 83f85d64e7e5c5dc3db2a05c3656d01a5c08eb2b Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 27 Sep 2024 10:18:52 -0500 Subject: [PATCH 23/35] fixing issues reported by tools --- .vscode/cspell.json | 3 +++ .../azure-ai-inference/azure/ai/inference/_patch.py | 1 + .../azure/ai/inference/aio/_patch.py | 1 + .../samples/sample_chat_completions_with_tracing.py | 13 +++++++------ .../tests/test_model_inference_client.py | 2 +- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 628b0a8ee30a..888f65b392ac 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -401,6 +401,9 @@ "uamqp", "uksouth", "ukwest", + "uninstrument", + "uninstrumented", + "uninstrumenting", "unpad", "unpadder", "unpartial", diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py index 9860f70a7a68..362fa75e2a91 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py @@ -454,6 +454,7 @@ def complete( :raises ~azure.core.exceptions.HttpResponseError: """ + @distributed_trace def complete( self, body: Union[JSON, IO[bytes]] = _Unset, diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py index 121401ce0f65..ac31fdb88108 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py @@ -437,6 +437,7 @@ async def complete( :raises ~azure.core.exceptions.HttpResponseError: """ + @distributed_trace_async async def complete( self, body: Union[JSON, IO[bytes]] = _Unset, diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py index 8580f1a51dff..392d779d6377 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -30,8 +30,8 @@ from opentelemetry import trace # opentelemetry-sdk is required for the opentelemetry.sdk imports. # You can install it with command "pip install opentelemetry.sdk". -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter +#from opentelemetry.sdk.trace import TracerProvider +#from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter from azure.ai.inference import ChatCompletionsClient from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason from azure.core.credentials import AzureKeyCredential @@ -42,10 +42,11 @@ # [END trace_setting] # Setup tracing to console -exporter = ConsoleSpanExporter() -trace.set_tracer_provider(TracerProvider()) -tracer = trace.get_tracer(__name__) -trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter)) +# Requires opentelemetry-sdk +#exporter = ConsoleSpanExporter() +#trace.set_tracer_provider(TracerProvider()) +#tracer = trace.get_tracer(__name__) +#trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter)) def chat_completion_streaming(key, endpoint): diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index cabd63362ee1..1c4b4fe26f39 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -879,7 +879,7 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): assert exception_caught == True @ServicePreparerChatCompletions() - def test_uninstrumenting_twise_causes_exception(self, **kwargs): + def test_uninstrumenting_twice_causes_exception(self, **kwargs): client = self._create_chat_client(**kwargs) exception_caught = False uninstrumented_once = False From e8dd67d50e5db14ca79ec2fd38b656376b33e886 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 27 Sep 2024 11:53:19 -0500 Subject: [PATCH 24/35] adding uninstrumentation to the beginning of tracing tests --- .../test_model_inference_async_client.py | 5 ++ .../tests/test_model_inference_client.py | 60 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py index 8619325504b0..db955f0cc010 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py @@ -715,6 +715,11 @@ def modify_env_var(self, name, new_value): @ServicePreparerChatCompletions() async def test_chat_completion_async_tracing_content_recording_disabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_async_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index 7f6d83c7acf2..3acbe369ce57 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -835,6 +835,11 @@ def modify_env_var(self, name, new_value): @ServicePreparerChatCompletions() def test_instrumentation(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass client = self._create_chat_client(**kwargs) exception_caught = False try: @@ -851,6 +856,11 @@ def test_instrumentation(self, **kwargs): @ServicePreparerChatCompletions() def test_instrumenting_twice_causes_exception(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass client = self._create_chat_client(**kwargs) exception_caught = False instrumented_once = False @@ -868,6 +878,11 @@ def test_instrumenting_twice_causes_exception(self, **kwargs): @ServicePreparerChatCompletions() def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass client = self._create_chat_client(**kwargs) exception_caught = False try: @@ -880,6 +895,11 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): @ServicePreparerChatCompletions() def test_uninstrumenting_twice_causes_exception(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass client = self._create_chat_client(**kwargs) exception_caught = False uninstrumented_once = False @@ -897,6 +917,11 @@ def test_uninstrumenting_twice_causes_exception(self, **kwargs): @ServicePreparerChatCompletions() def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() @@ -940,6 +965,11 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): @ServicePreparerChatCompletions() def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() @@ -997,6 +1027,11 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): @ServicePreparerChatCompletions() def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() @@ -1047,6 +1082,11 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw @ServicePreparerChatCompletions() def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True") client = self._create_chat_client(**kwargs) processor, exporter = self.setup_memory_trace_exporter() @@ -1111,6 +1151,11 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass import json from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition from azure.ai.inference import ChatCompletionsClient @@ -1273,6 +1318,11 @@ def get_weather(city: str) -> str: @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass import json from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition from azure.ai.inference import ChatCompletionsClient @@ -1387,6 +1437,11 @@ def get_weather(city: str) -> str: @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass import json from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition from azure.ai.inference import ChatCompletionsClient @@ -1587,6 +1642,11 @@ def get_weather(city: str) -> str: @ServicePreparerChatCompletions() def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs): + # Make sure code is not instrumented due to a previous test exception + try: + AIInferenceInstrumentor().uninstrument() + except RuntimeError as e: + pass import json from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition from azure.ai.inference import ChatCompletionsClient From 0c286c3efca8b7d5fb12452550928a9e5f9eb14d Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 27 Sep 2024 15:24:20 -0500 Subject: [PATCH 25/35] updating readme and sample --- sdk/ai/azure-ai-inference/samples/README.md | 9 +-------- .../samples/sample_chat_completions_with_tracing.py | 3 ++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md index ebc8990ceb7a..34fc9920f2fa 100644 --- a/sdk/ai/azure-ai-inference/samples/README.md +++ b/sdk/ai/azure-ai-inference/samples/README.md @@ -24,14 +24,7 @@ See [Prerequisites](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ * Clone or download this sample repository * Open a command prompt / terminal window in this samples folder -* Install the client library for Python with pip: - ```bash - pip install azure-ai-inference - ``` - or update an existing installation: - ```bash - pip install --upgrade azure-ai-inference - ``` +* Install the client library for Python with pip. See [Install the package](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#install-the-package) * If you plan to run the asynchronous client samples, insall the additional package [aiohttp](https://pypi.org/project/aiohttp/): ```bash pip install aiohttp diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py index 392d779d6377..ae97a00de6c2 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -70,7 +70,8 @@ def chat_completion_streaming(key, endpoint): # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes # to the span in the function implementation. Note that this will trace the function parameters and their values. -@tracer.start_as_current_span("get_temperature") +# Uncomment the following line to add instrumentation for the function call. +#@tracer.start_as_current_span("get_temperature") def get_temperature(city: str) -> str: # Adding attributes to the current span From 1aaf87c6f8479e95fee57d2a499a9f00bdd4d160 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Fri, 27 Sep 2024 16:12:22 -0500 Subject: [PATCH 26/35] adding ignore related to tool issue --- .../samples/sample_chat_completions_with_tracing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py index ae97a00de6c2..cf712218092b 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -70,8 +70,7 @@ def chat_completion_streaming(key, endpoint): # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes # to the span in the function implementation. Note that this will trace the function parameters and their values. -# Uncomment the following line to add instrumentation for the function call. -#@tracer.start_as_current_span("get_temperature") +@tracer.start_as_current_span("get_temperature") # type: ignore def get_temperature(city: str) -> str: # Adding attributes to the current span From 510a6cab4b39fc5921c0ec64e0c5c7a6c84e2511 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Mon, 30 Sep 2024 09:45:49 -0500 Subject: [PATCH 27/35] updating code snippet in readme --- sdk/ai/azure-ai-inference/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md index 85e493b1047b..da6b1a87226c 100644 --- a/sdk/ai/azure-ai-inference/README.md +++ b/sdk/ai/azure-ai-inference/README.md @@ -602,7 +602,7 @@ tracer = get_tracer(__name__) # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes # to the span in the function implementation. Note that this will trace the function parameters and their values. -@tracer.start_as_current_span("get_temperature") +@tracer.start_as_current_span("get_temperature") # type: ignore def get_temperature(city: str) -> str: # Adding attributes to the current span From fa8e8b0bb2d70c5833b59a294f05c9b5080519e3 Mon Sep 17 00:00:00 2001 From: Darren Cohen <39422044+dargilco@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:29:19 -0700 Subject: [PATCH 28/35] Add missing `@recorded_by_proxy` decorators to new tracing tests --- .../tests/test_model_inference_async_client.py | 1 + .../tests/test_model_inference_client.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py index db955f0cc010..5678c62fc7e6 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py @@ -714,6 +714,7 @@ def modify_env_var(self, name, new_value): return current_value @ServicePreparerChatCompletions() + @recorded_by_proxy_async async def test_chat_completion_async_tracing_content_recording_disabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py index 3acbe369ce57..60ba93a83092 100644 --- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py @@ -834,6 +834,7 @@ def modify_env_var(self, name, new_value): return current_value @ServicePreparerChatCompletions() + @recorded_by_proxy def test_instrumentation(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -855,6 +856,7 @@ def test_instrumentation(self, **kwargs): assert exception_caught == False @ServicePreparerChatCompletions() + @recorded_by_proxy def test_instrumenting_twice_causes_exception(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -877,6 +879,7 @@ def test_instrumenting_twice_causes_exception(self, **kwargs): assert exception_caught == True @ServicePreparerChatCompletions() + @recorded_by_proxy def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -894,6 +897,7 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs): assert exception_caught == True @ServicePreparerChatCompletions() + @recorded_by_proxy def test_uninstrumenting_twice_causes_exception(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -916,6 +920,7 @@ def test_uninstrumenting_twice_causes_exception(self, **kwargs): assert exception_caught == True @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -964,6 +969,7 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs): AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -1026,6 +1032,7 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs): AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -1081,6 +1088,7 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -1150,6 +1158,7 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -1317,6 +1326,7 @@ def get_weather(city: str) -> str: AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -1436,6 +1446,7 @@ def get_weather(city: str) -> str: AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: @@ -1641,6 +1652,7 @@ def get_weather(city: str) -> str: AIInferenceInstrumentor().uninstrument() @ServicePreparerChatCompletions() + @recorded_by_proxy def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs): # Make sure code is not instrumented due to a previous test exception try: From e410c311058989c8c37badb0667a56f2b4ccbeb1 Mon Sep 17 00:00:00 2001 From: Darren Cohen <39422044+dargilco@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:30:12 -0700 Subject: [PATCH 29/35] Push new recordings --- sdk/ai/azure-ai-inference/assets.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json index 08e25bc30c7d..fdb9e05b8246 100644 --- a/sdk/ai/azure-ai-inference/assets.json +++ b/sdk/ai/azure-ai-inference/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/ai/azure-ai-inference", - "Tag": "python/ai/azure-ai-inference_498e85cbfd" + "Tag": "python/ai/azure-ai-inference_19a0adafc6" } From 18b3d92999eb2d6ae2f2930800c4ba28d837c58e Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Wed, 2 Oct 2024 14:28:57 -0500 Subject: [PATCH 30/35] fixing issues reported by tools --- .../core/tracing/ai/inference/__init__.py | 4 + .../inference/_ai_inference_instrumentor.py | 14 +- .../_ai_inference_instrumentor_impl.py | 279 +++++++++++------- .../dev_requirements.txt | 1 + .../azure-core-tracing-opentelemetry/setup.py | 3 +- 5 files changed, 182 insertions(+), 119 deletions(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py index 88064b3607a6..9797b8c02824 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py @@ -3,3 +3,7 @@ # Licensed under the MIT License. # ------------------------------------ from ._ai_inference_instrumentor import AIInferenceInstrumentor + +__all__ = [ + "AIInferenceInstrumentor", +] diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py index 883ecd63b2b9..11113e7f6d48 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py @@ -5,21 +5,19 @@ class AIInferenceInstrumentor: - def __init__(self): - super().__init__() - def str_to_bool(self, s): - if s is None: - return False - return str(s).lower() == 'true' + if s is None: + return False + return str(s).lower() == "true" def instrument(self): if self.is_instrumented(): raise RuntimeError("Already instrumented") - + var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") enable_content_tracing = self.str_to_bool(var_value) from ._ai_inference_instrumentor_impl import _instrument_inference + _instrument_inference(enable_content_tracing) def uninstrument(self): @@ -27,8 +25,10 @@ def uninstrument(self): raise RuntimeError("Not instrumented") from ._ai_inference_instrumentor_impl import _uninstrument_inference + _uninstrument_inference() def is_instrumented(self): from ._ai_inference_instrumentor_impl import _is_instrumented + return _is_instrumented() diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py index b0543da2f361..a72942c9c451 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py @@ -2,7 +2,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- -import asyncio import copy import functools import importlib @@ -11,19 +10,22 @@ from urllib.parse import urlparse from enum import Enum from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict, Union -from azure.ai.inference.aio import ChatCompletionsClient +from opentelemetry.trace import StatusCode, Span + +# pylint: disable = no-name-in-module +from azure.core import CaseInsensitiveEnumMeta # type: ignore from azure.ai.inference import models as _models -from azure.core.tracing import AbstractSpan -from azure.core.tracing import SpanKind + +# pylint: disable = no-name-in-module +from azure.core.tracing import AbstractSpan, SpanKind # type: ignore from azure.core.settings import settings -from opentelemetry.trace import Status, StatusCode, Span _inference_traces_enabled: bool = False _trace_inference_content: bool = False INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference" -class TraceType(str, Enum): +class TraceType(str, Enum, metaclass=CaseInsensitiveEnumMeta): # pylint: disable=C4747 """An enumeration class to represent different types of traces.""" INFERENCE = "Inference" @@ -47,27 +49,27 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None: name = f"gen_ai.{message.get('role')}.message" span.span_instance.add_event( name=name, - attributes={ - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(message) - } + attributes={"gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(message)}, ) -def parse_url(url): - parsed = urlparse(url) - server_address = parsed.hostname - port = parsed.port - return server_address, port +def parse_url(url): + parsed = urlparse(url) + server_address = parsed.hostname + port = parsed.port + return server_address, port def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) -> None: client = args[0] - endpoint = client._config.endpoint + endpoint = client._config.endpoint # pylint: disable=protected-access server_address, port = parse_url(endpoint) - model = 'chat' - if kwargs.get('model') is not None: - model = kwargs.get('model') + model = "chat" + if kwargs.get("model") is not None: + model_value = kwargs.get("model") + if model_value is not None: + model = model_value + _set_attributes( span, ("gen_ai.operation.name", "chat"), @@ -85,40 +87,48 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) def remove_function_call_names_and_arguments(tool_calls: list) -> list: tool_calls_copy = copy.deepcopy(tool_calls) for tool_call in tool_calls_copy: - if 'function' in tool_call: - if 'name' in tool_call['function']: - del tool_call['function']['name'] - if 'arguments' in tool_call['function']: - del tool_call['function']['arguments'] - if not tool_call['function']: - del tool_call['function'] + if "function" in tool_call: + if "name" in tool_call["function"]: + del tool_call["function"]["name"] + if "arguments" in tool_call["function"]: + del tool_call["function"]["arguments"] + if not tool_call["function"]: + del tool_call["function"] return tool_calls_copy def get_finish_reasons(result): if hasattr(result, "choices") and result.choices: - return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices] - else: - return None + return [ + ( + getattr(choice, "finish_reason", None).value + if getattr(choice, "finish_reason", None) is not None + else "none" + ) + for choice in result.choices + ] + return None def get_finish_reason_for_choice(choice): - return getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" + return ( + getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" + ) def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None: for choice in result.choices: if _trace_inference_content: - response: Dict[str, Any] = { + full_response: Dict[str, Any] = { "message": {"content": choice.message.content}, "finish_reason": get_finish_reason_for_choice(choice), "index": choice.index, } if choice.message.tool_calls: - response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] - attributes={ + full_response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] + attributes = { "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(response) + "gen_ai.event.content": json.dumps(full_response), } else: response: Dict[str, Any] = { @@ -127,35 +137,46 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom } if choice.message.tool_calls: response["message"] = {} - tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(choice.message.tool_calls) - response["message"]["tool_calls"] = [tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed] - attributes={ + tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments( + choice.message.tool_calls + ) + response["message"]["tool_calls"] = [ + tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed + ] + attributes = { "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(response) + "gen_ai.event.content": json.dumps(response), } else: - attributes={ + attributes = { "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(response) + "gen_ai.event.content": json.dumps(response), } span.span_instance.add_event(name="gen_ai.choice", attributes=attributes) -def _add_response_chat_attributes(span: AbstractSpan, result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate]) -> None: +def _add_response_chat_attributes( + span: AbstractSpan, result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate] +) -> None: _set_attributes( span, ("gen_ai.response.id", result.id), ("gen_ai.response.model", result.model), - ("gen_ai.usage.input_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None), - ("gen_ai.usage.output_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None), + ( + "gen_ai.usage.input_tokens", + result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None, + ), + ( + "gen_ai.usage.output_tokens", + result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None, + ), ) finish_reasons = get_finish_reasons(result) span.add_attribute("gen_ai.response.finish_reasons", finish_reasons) -def _add_request_span_attributes(span: AbstractSpan, span_name: str, args: Any, kwargs: Any) -> None: - global _trace_inference_content +def _add_request_span_attributes(span: AbstractSpan, _span_name: str, args: Any, kwargs: Any) -> None: _add_request_chat_attributes(span, *args, **kwargs) if _trace_inference_content: _add_request_chat_message_event(span, **kwargs) @@ -183,7 +204,9 @@ def _accumulate_response(item, accumulate: Dict[str, Any]) -> None: if item.delta.tool_calls is not None: for tool_call in item.delta.tool_calls: if tool_call.id: - accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}}) + accumulate["message"]["tool_calls"].append( + {"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}} + ) if tool_call.function: accumulate["message"]["tool_calls"][-1]["type"] = "function" if tool_call.function and tool_call.function.name: @@ -192,21 +215,24 @@ def _accumulate_response(item, accumulate: Dict[str, Any]) -> None: accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments -def _wrapped_stream(stream_obj: _models.StreamingChatCompletions, span: AbstractSpan) -> _models.StreamingChatCompletions: +def _wrapped_stream( + stream_obj: _models.StreamingChatCompletions, span: AbstractSpan +) -> _models.StreamingChatCompletions: class StreamWrapper(_models.StreamingChatCompletions): def __init__(self, stream_obj): super().__init__(stream_obj._response) def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: - global _trace_inference_content try: accumulate: Dict[str, Any] = {} + chunk = None for chunk in stream_obj: for item in chunk.choices: _accumulate_response(item, accumulate) yield chunk - _add_response_chat_attributes(span, chunk) + if chunk is not None: + _add_response_chat_attributes(span, chunk) except Exception as exc: # Set the span status to error @@ -226,22 +252,26 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: accumulate["index"] = 0 # Delete message if content tracing is not enabled if not _trace_inference_content: - if 'message' in accumulate: - if 'content' in accumulate['message']: - del accumulate['message']['content'] - if not accumulate['message']: - del accumulate['message'] - if 'message' in accumulate: - if 'tool_calls' in accumulate['message']: - tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(accumulate['message']['tool_calls']) - accumulate['message']['tool_calls'] = [tool for tool in tool_calls_function_names_and_arguments_removed] + if "message" in accumulate: + if "content" in accumulate["message"]: + del accumulate["message"]["content"] + if not accumulate["message"]: + del accumulate["message"] + if "message" in accumulate: + if "tool_calls" in accumulate["message"]: + tool_calls_function_names_and_arguments_removed = ( + remove_function_call_names_and_arguments(accumulate["message"]["tool_calls"]) + ) + accumulate["message"]["tool_calls"] = list( + tool_calls_function_names_and_arguments_removed + ) span.span_instance.add_event( name="gen_ai.choice", attributes={ "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(accumulate) - } + "gen_ai.event.content": json.dumps(accumulate), + }, ) span.finish() @@ -249,25 +279,26 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]: def _trace_sync_function( - function: Callable = None, + function: Callable, *, - args_to_ignore: Optional[List[str]] = None, - trace_type=TraceType.INFERENCE, - name: Optional[str] = None, + _args_to_ignore: Optional[List[str]] = None, + _trace_type=TraceType.INFERENCE, + _name: Optional[str] = None, ) -> Callable: """ Decorator that adds tracing to a synchronous function. - Args: - function (Callable): The function to be traced. - args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace. - Defaults to None. - trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE. - name (str, optional): The name of the trace, will set to func name if not provided. - - - Returns: - Callable: The traced function. + :param function: The function to be traced. + :type function: Callable + :param args_to_ignore: A list of argument names to be ignored in the trace. + Defaults to None. + :type: args_to_ignore: [List[str]], optional + :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE. + :type trace_type: TraceType, optional + :param name: The name of the trace, will set to func name if not provided. + :type name: str, optional + :return: The traced function. + :rtype: Callable """ @functools.wraps(function) @@ -280,10 +311,10 @@ def inner(*args, **kwargs): class_function_name = function.__qualname__ if class_function_name.startswith("ChatCompletionsClient.complete"): - if kwargs.get('model') is None: - span_name = f"chat" + if kwargs.get("model") is None: + span_name = "chat" else: - model = kwargs.get('model') + model = kwargs.get("model") span_name = f"chat {model}" span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) @@ -310,29 +341,33 @@ def inner(*args, **kwargs): span.finish() return result + # Handle the default case (if the function name does not match) + return None # Ensure all paths return + return inner def _trace_async_function( - function: Callable = None, + function: Callable, *, - args_to_ignore: Optional[List[str]] = None, - trace_type=TraceType.INFERENCE, - name: Optional[str] = None, + _args_to_ignore: Optional[List[str]] = None, + _trace_type=TraceType.INFERENCE, + _name: Optional[str] = None, ) -> Callable: """ Decorator that adds tracing to an asynchronous function. - Args: - function (Callable): The function to be traced. - args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace. - Defaults to None. - trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE. - name (str, optional): The name of the trace, will set to func name if not provided. - - - Returns: - Callable: The traced function. + :param function: The function to be traced. + :type function: Callable + :param args_to_ignore: A list of argument names to be ignored in the trace. + Defaults to None. + :type: args_to_ignore: [List[str]], optional + :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE. + :type trace_type: TraceType, optional + :param name: The name of the trace, will set to func name if not provided. + :type name: str, optional + :return: The traced function. + :rtype: Callable """ @functools.wraps(function) @@ -345,10 +380,10 @@ async def inner(*args, **kwargs): class_function_name = function.__qualname__ if class_function_name.startswith("ChatCompletionsClient.complete"): - if kwargs.get('model') is None: - span_name = f"chat" + if kwargs.get("model") is None: + span_name = "chat" else: - model = kwargs.get('model') + model = kwargs.get("model") span_name = f"chat {model}" span = span_impl_type(name=span_name, kind=SpanKind.CLIENT) @@ -378,24 +413,36 @@ async def inner(*args, **kwargs): return inner -def inject_async(f, trace_type, name): +def inject_async(f, _trace_type, _name): wrapper_fun = _trace_async_function(f) - wrapper_fun._original = f + wrapper_fun._original = f # pylint: disable=protected-access return wrapper_fun -def inject_sync(f, trace_type, name): +def inject_sync(f, _trace_type, _name): wrapper_fun = _trace_sync_function(f) - wrapper_fun._original = f + wrapper_fun._original = f # pylint: disable=protected-access return wrapper_fun def _inference_apis(): sync_apis = ( - ("azure.ai.inference", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"), + ( + "azure.ai.inference", + "ChatCompletionsClient", + "complete", + TraceType.INFERENCE, + "inference_chat_completions_complete", + ), ) async_apis = ( - ("azure.ai.inference.aio", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"), + ( + "azure.ai.inference.aio", + "ChatCompletionsClient", + "complete", + TraceType.INFERENCE, + "inference_chat_completions_complete", + ), ) return sync_apis, async_apis @@ -407,8 +454,8 @@ def _inference_api_list(): def _generate_api_and_injector(apis): - for apis, injector in apis: - for module_name, class_name, method_name, trace_type, name in apis: + for api, injector in apis: + for module_name, class_name, method_name, trace_type, name in api: try: module = importlib.import_module(module_name) api = getattr(module, class_name) @@ -417,11 +464,11 @@ def _generate_api_and_injector(apis): except AttributeError as e: # Log the attribute exception with the missing class information logging.warning( - f"AttributeError: The module '{module_name}' does not have the class '{class_name}'. {str(e)}" + "AttributeError: The module '%s' does not have the class '%s'. %s", module_name, class_name, str(e) ) - except Exception as e: + except Exception as e: # pylint: disable=broad-except # Log other exceptions as a warning, as we're not sure what they might be - logging.warning(f"An unexpected error occurred: {str(e)}") + logging.warning("An unexpected error occurred: '%s'", str(e)) def available_inference_apis_and_injectors(): @@ -429,16 +476,23 @@ def available_inference_apis_and_injectors(): Generates a sequence of tuples containing Inference API classes, method names, and corresponding injector functions. - Yields: - Tuples of (api_class, method_name, injector_function) + :return: A generator yielding tuples. + :rtype: tuple """ yield from _generate_api_and_injector(_inference_api_list()) def _instrument_inference(enable_content_tracing: bool = False): - """This function modifies the methods of the Inference API classes to inject logic before calling the original methods. + """This function modifies the methods of the Inference API classes to + inject logic before calling the original methods. The original methods are stored as _original attributes of the methods. + + :param enable_content_tracing: Indicates whether tracing of message content should be enabled. + This also controls whether function call tool function names, + parameter names and parameter values are traced. + :type enable_content_tracing: bool """ + # pylint: disable=W0603 global _inference_traces_enabled global _trace_inference_content if _inference_traces_enabled: @@ -455,6 +509,7 @@ def _uninstrument_inference(): """This function restores the original methods of the Inference API classes by assigning them back from the _original attributes of the modified methods. """ + # pylint: disable=W0603 global _inference_traces_enabled global _trace_inference_content _trace_inference_content = False @@ -465,8 +520,10 @@ def _uninstrument_inference(): def _is_instrumented(): - """This function returns True if Inference API has already been instrumented - for tracing and False if the API has not been instrumented. + """This function returns True if Inference libary has already been instrumented + for tracing and False if it has not been instrumented. + + :return: A value indicating whether the Inference library is currently instrumented or not. + :rtype: bool """ - global _inference_traces_enabled return _inference_traces_enabled diff --git a/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt b/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt index 4397c64cc730..8c560bae2c94 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt +++ b/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt @@ -6,3 +6,4 @@ requests azure-storage-blob ../../servicebus/azure-servicebus ../../eventhub/azure-eventhub +../../ai/azure-ai-inference \ No newline at end of file diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py index 929e1cb3fee6..aa49df834875 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/setup.py +++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py @@ -64,6 +64,7 @@ python_requires=">=3.8", install_requires=[ "opentelemetry-api>=1.12.0", - "azure-core>=1.24.0", + "azure-core>=1.30.0", + "azure-ai-inference>=1.0.0b4" ], ) From 4a563540d1870a0305158615bc20a95d500e8b1a Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Wed, 2 Oct 2024 15:06:11 -0500 Subject: [PATCH 31/35] adding inference to shared requirements --- shared_requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/shared_requirements.txt b/shared_requirements.txt index e9fc401f0cd5..4cc574fd9157 100644 --- a/shared_requirements.txt +++ b/shared_requirements.txt @@ -57,6 +57,7 @@ opentelemetry-instrumentation-urllib3 opentelemetry-resource-detector-azure azure-nspkg azure-ai-nspkg +azure-ai-inference azure-cognitiveservices-nspkg azure-mgmt-nspkg azure-mixedreality-authentication From 58a754f410a96fab1a92de3e56c4e2135dd53741 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Wed, 2 Oct 2024 17:49:57 -0500 Subject: [PATCH 32/35] remove inference from setup --- sdk/core/azure-core-tracing-opentelemetry/setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py index aa49df834875..0513a8f12f8b 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/setup.py +++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py @@ -64,7 +64,6 @@ python_requires=">=3.8", install_requires=[ "opentelemetry-api>=1.12.0", - "azure-core>=1.30.0", - "azure-ai-inference>=1.0.0b4" + "azure-core>=1.30.0" ], ) From 4ed67dc466e7fa8a41d79cdc69ef91781360137f Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Thu, 3 Oct 2024 10:07:52 -0500 Subject: [PATCH 33/35] adding comma to setup --- sdk/core/azure-core-tracing-opentelemetry/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py index 0513a8f12f8b..929e1cb3fee6 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/setup.py +++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py @@ -64,6 +64,6 @@ python_requires=">=3.8", install_requires=[ "opentelemetry-api>=1.12.0", - "azure-core>=1.30.0" + "azure-core>=1.24.0", ], ) From 5a0aa713889a15c9269267aa9f8a94175bc02af9 Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Thu, 3 Oct 2024 10:53:06 -0500 Subject: [PATCH 34/35] updating version requirement for core --- sdk/core/azure-core-tracing-opentelemetry/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py index 929e1cb3fee6..600f3d09d35e 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/setup.py +++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py @@ -64,6 +64,6 @@ python_requires=">=3.8", install_requires=[ "opentelemetry-api>=1.12.0", - "azure-core>=1.24.0", + "azure-core>=1.30.0", ], ) From 121497809fab0abf06979599d1b0dda2ac70c52d Mon Sep 17 00:00:00 2001 From: Marko Hietala Date: Mon, 7 Oct 2024 15:28:30 -0500 Subject: [PATCH 35/35] changes based on review comments --- .../azure/ai/inference/_patch.py | 4 +- .../azure/ai/inference/aio/_patch.py | 2 +- .../sample_chat_completions_with_tracing.py | 2 +- .../inference/_ai_inference_instrumentor.py | 36 ++++++++++++- .../_ai_inference_instrumentor_impl.py | 50 ++++++++++++------- 5 files changed, 69 insertions(+), 25 deletions(-) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py index ec641d8c6444..4ff53fa7360f 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py @@ -102,7 +102,7 @@ def load_client( "The AI model information is missing a value for `model type`. Cannot create an appropriate client." ) - # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type + # TODO: Remove "completions", "chat-completions" and "embedding" once Mistral Large and Cohere fixes their model type if model_info.model_type in (_models.ModelType.CHAT, "completion", "chat-completion", "chat-completions"): chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init @@ -454,7 +454,7 @@ def complete( :raises ~azure.core.exceptions.HttpResponseError: """ - @distributed_trace + # pylint:disable=client-method-missing-tracing-decorator def complete( self, body: Union[JSON, IO[bytes]] = _Unset, diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py index 8b49d289bfd5..f8cdd4f892aa 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py @@ -630,7 +630,7 @@ async def complete( return _deserialize(_models._patch.ChatCompletions, response.json()) # pylint: disable=protected-access - @distributed_trace_async + # pylint:disable=client-method-missing-tracing-decorator-async async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: # pylint: disable=line-too-long """Returns information about the AI model. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py index cf712218092b..8fb1c1c67123 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py @@ -29,7 +29,7 @@ import os from opentelemetry import trace # opentelemetry-sdk is required for the opentelemetry.sdk imports. -# You can install it with command "pip install opentelemetry.sdk". +# You can install it with command "pip install opentelemetry-sdk". #from opentelemetry.sdk.trace import TracerProvider #from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter from azure.ai.inference import ChatCompletionsClient diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py index 11113e7f6d48..150134ed610c 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py @@ -5,22 +5,48 @@ class AIInferenceInstrumentor: - def str_to_bool(self, s): + """ + A class for managing the trace instrumentation of AI Inference. + + This class allows enabling or disabling tracing for AI Inference. + and provides functionality to check whether instrumentation is active. + """ + + def _str_to_bool(self, s): if s is None: return False return str(s).lower() == "true" def instrument(self): + """ + Enable instrumentation for AI Inference. + + Raises: + RuntimeError: If instrumentation is already enabled. + + This method checks the environment variable + 'AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED' to determine + whether to enable content tracing. + """ if self.is_instrumented(): raise RuntimeError("Already instrumented") var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") - enable_content_tracing = self.str_to_bool(var_value) + enable_content_tracing = self._str_to_bool(var_value) from ._ai_inference_instrumentor_impl import _instrument_inference _instrument_inference(enable_content_tracing) def uninstrument(self): + """ + Disable instrumentation for AI Inference. + + Raises: + RuntimeError: If instrumentation is not currently enabled. + + This method removes any active instrumentation, stopping the tracing + of AI Inference. + """ if not self.is_instrumented(): raise RuntimeError("Not instrumented") @@ -29,6 +55,12 @@ def uninstrument(self): _uninstrument_inference() def is_instrumented(self): + """ + Check if instrumentation for AI Inference is currently enabled. + + :return: True if instrumentation is active, False otherwise. + :rtype: bool + """ from ._ai_inference_instrumentor_impl import _is_instrumented return _is_instrumented() diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py index a72942c9c451..54a219c5aa31 100644 --- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py +++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py @@ -84,6 +84,7 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) span.add_attribute("server.port", port) +# When content tracing is not enabled, function calls, function parameter names and values are not traced. def remove_function_call_names_and_arguments(tool_calls: list) -> list: tool_calls_copy = copy.deepcopy(tool_calls) for tool_call in tool_calls_copy: @@ -99,14 +100,24 @@ def remove_function_call_names_and_arguments(tool_calls: list) -> list: def get_finish_reasons(result): if hasattr(result, "choices") and result.choices: - return [ - ( - getattr(choice, "finish_reason", None).value - if getattr(choice, "finish_reason", None) is not None - else "none" - ) - for choice in result.choices - ] + finish_reasons = [] + for choice in result.choices: + finish_reason = getattr(choice, "finish_reason", None) + + if finish_reason is None: + # If finish_reason is None, default to "none" + finish_reasons.append("none") + elif hasattr(finish_reason, "value"): + # If finish_reason has a 'value' attribute (i.e., it's an enum), use it + finish_reasons.append(finish_reason.value) + elif isinstance(finish_reason, str): + # If finish_reason is a string, use it directly + finish_reasons.append(finish_reason) + else: + # For any other type, you might want to handle it or default to "none" + finish_reasons.append("none") + + return finish_reasons return None @@ -143,15 +154,11 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom response["message"]["tool_calls"] = [ tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed ] - attributes = { - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(response), - } - else: - attributes = { - "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(response), - } + + attributes = { + "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, + "gen_ai.event.content": json.dumps(response), + } span.span_instance.add_event(name="gen_ai.choice", attributes=attributes) @@ -332,7 +339,8 @@ def inner(*args, **kwargs): # Set the span status to error if isinstance(span.span_instance, Span): span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) - module = exc.__module__ if exc.__module__ != "builtins" else "" + module = getattr(exc, "__module__", "") + module = module if module != "builtins" else "" error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ _set_attributes(span, ("error.type", error_type)) span.finish() @@ -401,7 +409,8 @@ async def inner(*args, **kwargs): # Set the span status to error if isinstance(span.span_instance, Span): span.span_instance.set_status(StatusCode.ERROR, description=str(exc)) - module = exc.__module__ if exc.__module__ != "builtins" else "" + module = getattr(exc, "__module__", "") + module = module if module != "builtins" else "" error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ _set_attributes(span, ("error.type", error_type)) span.finish() @@ -410,6 +419,9 @@ async def inner(*args, **kwargs): span.finish() return result + # Handle the default case (if the function name does not match) + return None # Ensure all paths return + return inner