From 06cef9167ef9ea41cfc1a4e0de8da29e6fe42f27 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Wed, 14 Aug 2024 14:39:33 -0400
Subject: [PATCH 01/35] adding inference trace injection

---
 .../azure-core/azure/core/tracing/__init__.py |   1 +
 .../tracing/_generative_ai_trace_injectors.py |  48 +++
 .../core/tracing/_inference_api_injector.py   | 370 ++++++++++++++++++
 3 files changed, 419 insertions(+)
 create mode 100644 sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py
 create mode 100644 sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py

diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py
index ecf6fe6da8df..703d89e31a23 100644
--- a/sdk/core/azure-core/azure/core/tracing/__init__.py
+++ b/sdk/core/azure-core/azure/core/tracing/__init__.py
@@ -8,5 +8,6 @@
     HttpSpanMixin,
     Link,
 )
+from ._generative_ai_trace_injectors import start_generative_ai_traces, stop_generative_ai_traces, GenerativeAIPackage
 
 __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"]
diff --git a/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py b/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py
new file mode 100644
index 000000000000..91ced9f54509
--- /dev/null
+++ b/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py
@@ -0,0 +1,48 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from enum import Enum
+
+class GenerativeAIPackage(str, Enum):
+    """An enumeration class to represent the packages that provide generative AI traces."""
+
+    INFERENCE = "azure.ai.inference"
+
+
+def start_generative_ai_traces(package_name: GenerativeAIPackage, enable_content_tracing: bool=False):
+    """This function starts generative AI traces for the requested package.
+
+    Args:
+        package_name (GenerativeAIPackage): The pacakge for which generative AI tracing is to be started.
+        enable_content_tracing (enable_content_tracing, optional): Configures whether the message content gets traced as part of the generative AI traces for the specific pacakge
+                                                                   for which tracing is being started as specified in the pacakge_name parameter.
+                                                                   Note that this value is package specific, in other words, the value passed in will only apply for the specific
+                                                                   package for which the traces are requested to be started and will not have an impact on any traces previously started
+                                                                   for other packages.
+                                                                   Defaults to False.
+
+    Raises:
+        RuntimeError: If traces for the requested package have already been started.
+        ValueError: The specified package does not support generative AI traces.
+    """
+    if package_name == GenerativeAIPackage.INFERENCE:
+        from ._inference_api_injector import _inject_inference_api
+        _inject_inference_api(enable_content_tracing)
+    else:
+        raise ValueError("The specified package does not support generative AI traces")
+
+
+def stop_generative_ai_traces(package_name: GenerativeAIPackage):
+    """This function stops tracing for the generative AI pacakge.
+
+    Args:
+        package_name (GenerativeAIPackage): The pacakge for which tracing is to be stopped.
+
+    Raises:
+        ValueError: The specified package does not support generative AI traces.        
+    """
+    if package_name == GenerativeAIPackage.INFERENCE:
+        from ._inference_api_injector import _restore_inference_api
+        _restore_inference_api()
+    else:
+        raise ValueError("The specified package does not support generative AI traces")
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py
new file mode 100644
index 000000000000..3b369ec5373c
--- /dev/null
+++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py
@@ -0,0 +1,370 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+import asyncio
+import functools
+import importlib
+import json
+import logging
+from enum import Enum
+from typing import Any, Iterator, Callable, Optional, List
+from azure.ai.inference.aio import ChatCompletionsClient
+from azure.ai.inference import models as _models
+from azure.core.tracing import AbstractSpan
+from azure.core.tracing import SpanKind
+from azure.core.settings import settings
+from .common import get_function_and_class_name
+
+_inference_traces_enabled: bool = False
+_trace_inference_content: bool = False
+
+class TraceType(str, Enum):
+    """An enumeration class to represent different types of traces."""
+
+    INFERENCE = "Inference"
+
+
+def _set_attributes(span: AbstractSpan, *attrs: tuple[str, Any]) -> None:
+    for attr in attrs:
+        key, value = attr
+        if value is not None:
+            span.add_attribute(key, value)
+
+
+def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None:
+    for message in kwargs.get("messages", []):
+        try:
+            message = message.as_dict()
+        except AttributeError:
+            pass
+
+        if message.get("role"):
+            name = f"gen_ai.{message.get('role')}.message"
+            span.span_instance.add_event(
+                name=name,
+                attributes={
+                    "get_ai.system": "openai",
+                    "gen_ai.event.content": json.dumps(message)
+                }
+            )
+
+
+def _add_request_chat_attributes(span: AbstractSpan, **kwargs: Any) -> None:
+    _set_attributes(
+        span,
+        ("gen_ai.system", "openai"),
+        ("gen_ai.request.model", kwargs.get("model")),
+        ("gen_ai.request.max_tokens", kwargs.get("max_tokens")),
+        ("gen_ai.request.temperature", kwargs.get("temperature")),
+        ("gen_ai.request.top_p", kwargs.get("top_p")),
+    )
+
+
+def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None:
+    global _trace_inference_content
+    for choice in result.choices:
+        if _trace_inference_content:
+            response: dict[str, Any] = {
+                "message": {"content": choice.message.content},
+                "finish_reason": str(choice.finish_reason),
+                "index": choice.index,
+            }
+            attributes={
+                "get_ai.system": "openai",
+                "gen_ai.event.content": json.dumps(response)
+            }
+        else:
+            response: dict[str, Any] = {
+                "finish_reason": str(choice.finish_reason),
+                "index": choice.index,
+            }
+            attributes={
+                "get_ai.system": "openai",
+            }
+        if choice.message.tool_calls:
+            response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
+        span.span_instance.add_event(name="gen_ai.choice", attributes=attributes)
+
+
+def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None:
+    _set_attributes(
+        span,
+        ("gen_ai.response.id", result.id),
+        ("gen_ai.response.model", result.model),
+        ("gen_ai.response.finish_reason", str(result.choices[-1].finish_reason)),
+        ("gen_ai.usage.completion_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None),
+        ("gen_ai.usage.prompt_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None),
+    )
+
+
+def _add_request_span_attributes(span: AbstractSpan, span_name: str, kwargs: Any) -> None:
+    global _trace_inference_content
+    if span_name.startswith("ChatCompletionsClient.complete"):
+        _add_request_chat_attributes(span, **kwargs)
+        if _trace_inference_content:
+            _add_request_chat_message_event(span, **kwargs)
+    # TODO add more models here
+
+
+def _add_response_span_attributes(span: AbstractSpan, result: object) -> None:
+    if isinstance(result, _models.ChatCompletions):
+        _add_response_chat_attributes(span, result)
+        _add_response_chat_message_event(span, result)
+    # TODO add more models here
+
+
+def _accumulate_response(item, accumulate: dict[str, Any]) -> None:
+    if item.finish_reason:
+        accumulate["finish_reason"] = item.finish_reason
+    if item.index:
+        accumulate["index"] = item.index
+    if item.delta.content:
+        accumulate.setdefault("message", {})
+        accumulate["message"].setdefault("content", "")
+        accumulate["message"]["content"] += item.delta.content
+    if item.delta.tool_calls:
+        accumulate.setdefault("message", {})
+        accumulate["message"].setdefault("tool_calls", [])
+        for tool_call in item.delta.tool_calls:
+            if tool_call.id:
+                accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}})
+            if tool_call.type:
+                accumulate["message"]["tool_calls"][-1]["type"] = tool_call.type
+            if tool_call.function and tool_call.function.name:
+                accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name
+            if tool_call.function and tool_call.function.arguments:
+                accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments
+
+
+def _wrapped_stream(stream_obj: _models.StreamingChatCompletions, span: AbstractSpan) ->  _models.StreamingChatCompletions:
+    class StreamWrapper(_models.StreamingChatCompletions):
+        def __init__(self, stream_obj):
+            super().__init__(stream_obj._response)
+
+        def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
+            global _trace_inference_content
+            try:
+                accumulate: dict[str, Any] = {}
+                for chunk in stream_obj:
+                    for item in chunk.choices:
+                        _accumulate_response(item, accumulate)
+                    yield chunk
+
+                if _trace_inference_content:
+                    span.span_instance.add_event(
+                        name="gen_ai.choice",
+                        attributes={
+                            "get_ai.system": "openai",
+                            "gen_ai.event.content": json.dumps(accumulate)
+                        }
+                    )
+                _add_response_chat_attributes(span, chunk)
+
+            except Exception as exc:
+                _set_attributes(span, ("error.type", exc.__class__.__name__))
+                raise
+
+            finally:
+                if stream_obj._done is False:
+                    if accumulate.get("finish_reason") is None:
+                        accumulate["finish_reason"] = "error"
+                    if _trace_inference_content:
+                        span.span_instance.add_event(
+                            name="gen_ai.choice",
+                            attributes={
+                                "get_ai.system": "openai",
+                                "gen_ai.event.content": json.dumps(accumulate)
+                            }
+                        )
+                span.finish()
+
+    return StreamWrapper(stream_obj)
+
+
+def _trace_sync_function(
+    func: Callable = None,
+    *,
+    args_to_ignore: Optional[List[str]] = None,
+    trace_type=TraceType.INFERENCE,
+    name: Optional[str] = None,
+) -> Callable:
+    """
+    Decorator that adds tracing to a synchronous function.
+
+    Args:
+        func (Callable): The function to be traced.
+        args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace.
+                                                        Defaults to None.
+        trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE.
+        name (str, optional): The name of the trace, will set to func name if not provided.
+
+
+    Returns:
+        Callable: The traced function.
+    """
+
+    @functools.wraps(func)
+    def inner(*args, **kwargs):
+
+        span_impl_type = settings.tracing_implementation()
+        if span_impl_type is None:
+            return func(*args, **kwargs)
+
+        span_name = get_function_and_class_name(func, *args)
+        span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL)
+        try:
+            # tracing events not supported in azure-core-tracing-opentelemetry
+            # so need to access the span instance directly
+            with span_impl_type.change_context(span.span_instance):
+                _add_request_span_attributes(span, span_name, kwargs)
+                result = func(*args, **kwargs)
+                if kwargs.get("stream") is True:
+                    return _wrapped_stream(result, span)
+                _add_response_span_attributes(span, result)
+
+        except Exception as exc:
+            _set_attributes(span, ("error.type", exc.__class__.__name__))
+            span.finish()
+            raise
+
+        span.finish()
+        return result
+
+    return inner
+
+
+def _trace_async_function(
+    func: Callable = None,
+    *,
+    args_to_ignore: Optional[List[str]] = None,
+    trace_type=TraceType.INFERENCE,
+    name: Optional[str] = None,
+) -> Callable:
+    """
+    Decorator that adds tracing to an asynchronous function.
+
+    Args:
+        func (Callable): The function to be traced.
+        args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace.
+                                                        Defaults to None.
+        trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE.
+        name (str, optional): The name of the trace, will set to func name if not provided.
+
+
+    Returns:
+        Callable: The traced function.
+    """
+
+    @functools.wraps(func)
+    async def inner(*args, **kwargs):
+
+        span_impl_type = settings.tracing_implementation()
+        if span_impl_type is None:
+            return func(*args, **kwargs)
+
+        span_name = get_function_and_class_name(func, *args)
+        span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL)
+        try:
+            # tracing events not supported in azure-core-tracing-opentelemetry
+            # so need to access the span instance directly
+            with span_impl_type.change_context(span.span_instance):
+                _add_request_span_attributes(span, span_name, kwargs)
+                result = await func(*args, **kwargs)
+                if kwargs.get("stream") is True:
+                    return _wrapped_stream(result, span)
+                _add_response_span_attributes(span, result)
+
+        except Exception as exc:
+            _set_attributes(span, ("error.type", exc.__class__.__name__))
+            span.finish()
+            raise
+
+        span.finish()
+        return result
+
+    return inner
+
+
+def inject_async(f, trace_type, name):
+    wrapper_fun = _trace_async_function(f)
+    wrapper_fun._original = f
+    return wrapper_fun
+
+
+def inject_sync(f, trace_type, name):
+    wrapper_fun = _trace_sync_function(f)
+    wrapper_fun._original = f
+    return wrapper_fun
+
+
+def _inference_apis():
+    sync_apis = (
+        ("azure.ai.inference", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"),
+    )
+    async_apis = ()
+    return sync_apis, async_apis
+
+
+def _inference_api_list():
+    sync_apis, async_apis = _inference_apis()
+    yield sync_apis, inject_sync
+    yield async_apis, inject_async
+
+
+def _generate_api_and_injector(apis):
+    for apis, injector in apis:
+        for module_name, class_name, method_name, trace_type, name in apis:
+            try:
+                module = importlib.import_module(module_name)
+                api = getattr(module, class_name)
+                if hasattr(api, method_name):
+                    yield api, method_name, trace_type, injector, name
+            except AttributeError as e:
+                # Log the attribute exception with the missing class information
+                logging.warning(
+                    f"AttributeError: The module '{module_name}' does not have the class '{class_name}'. {str(e)}"
+                )
+            except Exception as e:
+                # Log other exceptions as a warning, as we're not sure what they might be
+                logging.warning(f"An unexpected error occurred: {str(e)}")
+
+
+def available_inference_apis_and_injectors():
+    """
+    Generates a sequence of tuples containing Inference API classes, method names, and
+    corresponding injector functions.
+
+    Yields:
+        Tuples of (api_class, method_name, injector_function)
+    """
+    yield from _generate_api_and_injector(_inference_api_list())
+
+
+def _inject_inference_api(enable_content_tracing: bool = False):
+    """This function modifies the methods of the Inference API classes to inject logic before calling the original methods.
+    The original methods are stored as _original attributes of the methods.
+    """
+    global _inference_traces_enabled
+    global _trace_inference_content
+    if _inference_traces_enabled:
+        raise RuntimeError("Traces already started for azure.ai.inference")
+    _inference_traces_enabled = True
+    _trace_inference_content = enable_content_tracing
+    for api, method, trace_type, injector, name in available_inference_apis_and_injectors():
+        # Check if the method of the api class has already been modified
+        if not hasattr(getattr(api, method), "_original"):
+            setattr(api, method, injector(getattr(api, method), trace_type, name))
+
+
+def _restore_inference_api():
+    """This function restores the original methods of the Inference API classes
+    by assigning them back from the _original attributes of the modified methods.
+    """
+    global _inference_traces_enabled
+    global _trace_inference_content
+    _trace_inference_content = False
+    for api, method, _, _, _ in available_inference_apis_and_injectors():
+        if hasattr(getattr(api, method), "_original"):
+            setattr(api, method, getattr(getattr(api, method), "_original"))
+    _inference_traces_enabled = False

From 9dc2cf957febe69a7cc63b6678e4ac84d2a4cea0 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 16 Aug 2024 12:47:39 -0400
Subject: [PATCH 02/35] changing the interface based on feedback

---
 .../azure-core/azure/core/tracing/__init__.py |  2 +-
 .../tracing/_generative_ai_trace_injectors.py | 48 -------------------
 .../tracing/_inference_api_instrumentor.py    | 34 +++++++++++++
 ...py => _inference_api_instrumentor_impl.py} | 32 ++++++++-----
 .../tracing/azure_telemetry_instrumentor.py   | 20 ++++++++
 5 files changed, 75 insertions(+), 61 deletions(-)
 delete mode 100644 sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py
 create mode 100644 sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py
 rename sdk/core/azure-core/azure/core/tracing/{_inference_api_injector.py => _inference_api_instrumentor_impl.py} (94%)
 create mode 100644 sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py

diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py
index 703d89e31a23..741b9e07b267 100644
--- a/sdk/core/azure-core/azure/core/tracing/__init__.py
+++ b/sdk/core/azure-core/azure/core/tracing/__init__.py
@@ -8,6 +8,6 @@
     HttpSpanMixin,
     Link,
 )
-from ._generative_ai_trace_injectors import start_generative_ai_traces, stop_generative_ai_traces, GenerativeAIPackage
+from ._inference_api_instrumentor import InferenceApiInstrumentor
 
 __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"]
diff --git a/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py b/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py
deleted file mode 100644
index 91ced9f54509..000000000000
--- a/sdk/core/azure-core/azure/core/tracing/_generative_ai_trace_injectors.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-from enum import Enum
-
-class GenerativeAIPackage(str, Enum):
-    """An enumeration class to represent the packages that provide generative AI traces."""
-
-    INFERENCE = "azure.ai.inference"
-
-
-def start_generative_ai_traces(package_name: GenerativeAIPackage, enable_content_tracing: bool=False):
-    """This function starts generative AI traces for the requested package.
-
-    Args:
-        package_name (GenerativeAIPackage): The pacakge for which generative AI tracing is to be started.
-        enable_content_tracing (enable_content_tracing, optional): Configures whether the message content gets traced as part of the generative AI traces for the specific pacakge
-                                                                   for which tracing is being started as specified in the pacakge_name parameter.
-                                                                   Note that this value is package specific, in other words, the value passed in will only apply for the specific
-                                                                   package for which the traces are requested to be started and will not have an impact on any traces previously started
-                                                                   for other packages.
-                                                                   Defaults to False.
-
-    Raises:
-        RuntimeError: If traces for the requested package have already been started.
-        ValueError: The specified package does not support generative AI traces.
-    """
-    if package_name == GenerativeAIPackage.INFERENCE:
-        from ._inference_api_injector import _inject_inference_api
-        _inject_inference_api(enable_content_tracing)
-    else:
-        raise ValueError("The specified package does not support generative AI traces")
-
-
-def stop_generative_ai_traces(package_name: GenerativeAIPackage):
-    """This function stops tracing for the generative AI pacakge.
-
-    Args:
-        package_name (GenerativeAIPackage): The pacakge for which tracing is to be stopped.
-
-    Raises:
-        ValueError: The specified package does not support generative AI traces.        
-    """
-    if package_name == GenerativeAIPackage.INFERENCE:
-        from ._inference_api_injector import _restore_inference_api
-        _restore_inference_api()
-    else:
-        raise ValueError("The specified package does not support generative AI traces")
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py
new file mode 100644
index 000000000000..e47cb26784cd
--- /dev/null
+++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py
@@ -0,0 +1,34 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import os
+from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor
+ 
+class InferenceApiInstrumentor(AzureTelemetryInstrumentor):
+    def __init__(self):
+        super().__init__()
+
+    def str_to_bool(self, s):
+        if s is None:  
+            return False  
+        return str(s).lower() == 'true'
+
+    def instrument(self):
+        if self.is_instrumented():
+            raise RuntimeError("Already instrumented")
+        
+        var_value = os.environ.get("AZURE_INFERENCE_API_ENABLE_CONTENT_TRACING")
+        enable_content_tracing = self.str_to_bool(var_value)
+        from ._inference_api_instrumentor_impl import _inject_inference_api
+        _inject_inference_api(enable_content_tracing)
+
+    def uninstrument(self):
+        if not self.is_instrumented():
+            raise RuntimeError("Not instrumented")
+
+        from ._inference_api_instrumentor_impl import _restore_inference_api
+        _restore_inference_api()
+
+    def is_instrumented(self):
+        from ._inference_api_instrumentor_impl import _is_instrumented
+        return _is_instrumented()
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
similarity index 94%
rename from sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py
rename to sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
index 3b369ec5373c..f2ceaeb18af2 100644
--- a/sdk/core/azure-core/azure/core/tracing/_inference_api_injector.py
+++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
@@ -183,7 +183,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
 
 
 def _trace_sync_function(
-    func: Callable = None,
+    function: Callable = None,
     *,
     args_to_ignore: Optional[List[str]] = None,
     trace_type=TraceType.INFERENCE,
@@ -193,7 +193,7 @@ def _trace_sync_function(
     Decorator that adds tracing to a synchronous function.
 
     Args:
-        func (Callable): The function to be traced.
+        function (Callable): The function to be traced.
         args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace.
                                                         Defaults to None.
         trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE.
@@ -204,21 +204,21 @@ def _trace_sync_function(
         Callable: The traced function.
     """
 
-    @functools.wraps(func)
+    @functools.wraps(function)
     def inner(*args, **kwargs):
 
         span_impl_type = settings.tracing_implementation()
         if span_impl_type is None:
-            return func(*args, **kwargs)
+            return function(*args, **kwargs)
 
-        span_name = get_function_and_class_name(func, *args)
+        span_name = get_function_and_class_name(function, *args)
         span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL)
         try:
             # tracing events not supported in azure-core-tracing-opentelemetry
             # so need to access the span instance directly
             with span_impl_type.change_context(span.span_instance):
                 _add_request_span_attributes(span, span_name, kwargs)
-                result = func(*args, **kwargs)
+                result = function(*args, **kwargs)
                 if kwargs.get("stream") is True:
                     return _wrapped_stream(result, span)
                 _add_response_span_attributes(span, result)
@@ -235,7 +235,7 @@ def inner(*args, **kwargs):
 
 
 def _trace_async_function(
-    func: Callable = None,
+    function: Callable = None,
     *,
     args_to_ignore: Optional[List[str]] = None,
     trace_type=TraceType.INFERENCE,
@@ -245,7 +245,7 @@ def _trace_async_function(
     Decorator that adds tracing to an asynchronous function.
 
     Args:
-        func (Callable): The function to be traced.
+        function (Callable): The function to be traced.
         args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace.
                                                         Defaults to None.
         trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE.
@@ -256,21 +256,21 @@ def _trace_async_function(
         Callable: The traced function.
     """
 
-    @functools.wraps(func)
+    @functools.wraps(function)
     async def inner(*args, **kwargs):
 
         span_impl_type = settings.tracing_implementation()
         if span_impl_type is None:
-            return func(*args, **kwargs)
+            return function(*args, **kwargs)
 
-        span_name = get_function_and_class_name(func, *args)
+        span_name = get_function_and_class_name(function, *args)
         span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL)
         try:
             # tracing events not supported in azure-core-tracing-opentelemetry
             # so need to access the span instance directly
             with span_impl_type.change_context(span.span_instance):
                 _add_request_span_attributes(span, span_name, kwargs)
-                result = await func(*args, **kwargs)
+                result = await function(*args, **kwargs)
                 if kwargs.get("stream") is True:
                     return _wrapped_stream(result, span)
                 _add_response_span_attributes(span, result)
@@ -368,3 +368,11 @@ def _restore_inference_api():
         if hasattr(getattr(api, method), "_original"):
             setattr(api, method, getattr(getattr(api, method), "_original"))
     _inference_traces_enabled = False
+
+
+def _is_instrumented():
+    """This function returns True if Inference API has already been instrumented
+    for tracing and False if the API has not been instrumented.
+    """
+    global _inference_traces_enabled
+    return _inference_traces_enabled
diff --git a/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py
new file mode 100644
index 000000000000..7950a442363e
--- /dev/null
+++ b/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py
@@ -0,0 +1,20 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from abc import ABC, abstractmethod  
+  
+class AzureTelemetryInstrumentor(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def instrument(self):
+        pass
+
+    @abstractmethod
+    def uninstrument(self):
+        pass
+
+    @abstractmethod
+    def is_instrumented(self):
+        pass
\ No newline at end of file

From 58a032b851466d707c4a72f780dc3501c56ecea7 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 16 Aug 2024 16:25:17 -0400
Subject: [PATCH 03/35] updates

---
 sdk/core/azure-core/azure/core/tracing/__init__.py  |  2 +-
 ...umentor.py => _ai_inference_api_instrumentor.py} |  2 +-
 .../tracing/_inference_api_instrumentor_impl.py     | 13 +++++++------
 3 files changed, 9 insertions(+), 8 deletions(-)
 rename sdk/core/azure-core/azure/core/tracing/{_inference_api_instrumentor.py => _ai_inference_api_instrumentor.py} (95%)

diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py
index 741b9e07b267..79738b9fd650 100644
--- a/sdk/core/azure-core/azure/core/tracing/__init__.py
+++ b/sdk/core/azure-core/azure/core/tracing/__init__.py
@@ -8,6 +8,6 @@
     HttpSpanMixin,
     Link,
 )
-from ._inference_api_instrumentor import InferenceApiInstrumentor
+from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor
 
 __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"]
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
similarity index 95%
rename from sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py
rename to sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
index e47cb26784cd..5402e233f2f6 100644
--- a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor.py
+++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
@@ -4,7 +4,7 @@
 import os
 from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor
  
-class InferenceApiInstrumentor(AzureTelemetryInstrumentor):
+class AiInferenceApiInstrumentor(AzureTelemetryInstrumentor):
     def __init__(self):
         super().__init__()
 
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
index f2ceaeb18af2..22a0330dabd9 100644
--- a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
@@ -18,6 +18,7 @@
 
 _inference_traces_enabled: bool = False
 _trace_inference_content: bool = False
+INFERENCE_GEN_AI_SYSTEM_NAME = "azure.ai.inference"
 
 class TraceType(str, Enum):
     """An enumeration class to represent different types of traces."""
@@ -44,7 +45,7 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None:
             span.span_instance.add_event(
                 name=name,
                 attributes={
-                    "get_ai.system": "openai",
+                    "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                     "gen_ai.event.content": json.dumps(message)
                 }
             )
@@ -53,7 +54,7 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None:
 def _add_request_chat_attributes(span: AbstractSpan, **kwargs: Any) -> None:
     _set_attributes(
         span,
-        ("gen_ai.system", "openai"),
+        ("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME),
         ("gen_ai.request.model", kwargs.get("model")),
         ("gen_ai.request.max_tokens", kwargs.get("max_tokens")),
         ("gen_ai.request.temperature", kwargs.get("temperature")),
@@ -71,7 +72,7 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom
                 "index": choice.index,
             }
             attributes={
-                "get_ai.system": "openai",
+                "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                 "gen_ai.event.content": json.dumps(response)
             }
         else:
@@ -80,7 +81,7 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom
                 "index": choice.index,
             }
             attributes={
-                "get_ai.system": "openai",
+                "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
             }
         if choice.message.tool_calls:
             response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
@@ -155,7 +156,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                     span.span_instance.add_event(
                         name="gen_ai.choice",
                         attributes={
-                            "get_ai.system": "openai",
+                            "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                             "gen_ai.event.content": json.dumps(accumulate)
                         }
                     )
@@ -173,7 +174,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                         span.span_instance.add_event(
                             name="gen_ai.choice",
                             attributes={
-                                "get_ai.system": "openai",
+                                "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                                 "gen_ai.event.content": json.dumps(accumulate)
                             }
                         )

From ec1cd166e1739332f9c78e25991d5bd113574f23 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Tue, 20 Aug 2024 13:52:43 -0400
Subject: [PATCH 04/35] changing name of environment variable

---
 .../azure/core/tracing/_ai_inference_api_instrumentor.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
index 5402e233f2f6..b6b2ae08caad 100644
--- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
+++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
@@ -17,7 +17,7 @@ def instrument(self):
         if self.is_instrumented():
             raise RuntimeError("Already instrumented")
         
-        var_value = os.environ.get("AZURE_INFERENCE_API_ENABLE_CONTENT_TRACING")
+        var_value = os.environ.get("AZUREAI_INFERENCE_API_ENABLE_CONTENT_TRACING")
         enable_content_tracing = self.str_to_bool(var_value)
         from ._inference_api_instrumentor_impl import _inject_inference_api
         _inject_inference_api(enable_content_tracing)

From 327007684beb06a23dec9852629045c21de46f52 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 6 Sep 2024 16:08:42 -0500
Subject: [PATCH 05/35] changes based on review comments and some other changes

---
 .../tracing/_ai_inference_api_instrumentor.py |   5 +-
 .../_inference_api_instrumentor_impl.py       | 166 +++++++++++-------
 2 files changed, 109 insertions(+), 62 deletions(-)

diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
index b6b2ae08caad..1964e3a83793 100644
--- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
+++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
@@ -3,7 +3,8 @@
 # ---------------------------------------------------------
 import os
 from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor
- 
+
+
 class AiInferenceApiInstrumentor(AzureTelemetryInstrumentor):
     def __init__(self):
         super().__init__()
@@ -17,7 +18,7 @@ def instrument(self):
         if self.is_instrumented():
             raise RuntimeError("Already instrumented")
         
-        var_value = os.environ.get("AZUREAI_INFERENCE_API_ENABLE_CONTENT_TRACING")
+        var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
         enable_content_tracing = self.str_to_bool(var_value)
         from ._inference_api_instrumentor_impl import _inject_inference_api
         _inject_inference_api(enable_content_tracing)
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
index 22a0330dabd9..36e14936e174 100644
--- a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
@@ -7,18 +7,21 @@
 import importlib
 import json
 import logging
+from urllib.parse import urlparse
 from enum import Enum
-from typing import Any, Iterator, Callable, Optional, List
+from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict
 from azure.ai.inference.aio import ChatCompletionsClient
 from azure.ai.inference import models as _models
 from azure.core.tracing import AbstractSpan
 from azure.core.tracing import SpanKind
 from azure.core.settings import settings
 from .common import get_function_and_class_name
+from opentelemetry.trace import Status, StatusCode, Span
 
 _inference_traces_enabled: bool = False
 _trace_inference_content: bool = False
-INFERENCE_GEN_AI_SYSTEM_NAME = "azure.ai.inference"
+INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference"
+
 
 class TraceType(str, Enum):
     """An enumeration class to represent different types of traces."""
@@ -26,7 +29,7 @@ class TraceType(str, Enum):
     INFERENCE = "Inference"
 
 
-def _set_attributes(span: AbstractSpan, *attrs: tuple[str, Any]) -> None:
+def _set_attributes(span: AbstractSpan, *attrs: Tuple[str, Any]) -> None:
     for attr in attrs:
         key, value = attr
         if value is not None:
@@ -45,67 +48,87 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None:
             span.span_instance.add_event(
                 name=name,
                 attributes={
-                    "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                    "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                     "gen_ai.event.content": json.dumps(message)
                 }
             )
 
 
-def _add_request_chat_attributes(span: AbstractSpan, **kwargs: Any) -> None:
+def parse_url(url):  
+    parsed = urlparse(url)  
+    server_address = parsed.hostname  
+    port = parsed.port  
+    return server_address, port 
+
+
+def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) -> None:
+    client = args[0]
+    endpoint = client._config.endpoint
+    server_address, port = parse_url(endpoint)
     _set_attributes(
         span,
+        ("gen_ai.operation.name", "chat"),
         ("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME),
         ("gen_ai.request.model", kwargs.get("model")),
         ("gen_ai.request.max_tokens", kwargs.get("max_tokens")),
         ("gen_ai.request.temperature", kwargs.get("temperature")),
         ("gen_ai.request.top_p", kwargs.get("top_p")),
+        ("server.address", server_address),
     )
+    if port is not None and port != 443:
+        span.add_attribute("server.port", port)
 
 
 def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None:
-    global _trace_inference_content
     for choice in result.choices:
         if _trace_inference_content:
-            response: dict[str, Any] = {
+            response: Dict[str, Any] = {
                 "message": {"content": choice.message.content},
                 "finish_reason": str(choice.finish_reason),
                 "index": choice.index,
             }
             attributes={
-                "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                 "gen_ai.event.content": json.dumps(response)
             }
         else:
-            response: dict[str, Any] = {
+            response: Dict[str, Any] = {
                 "finish_reason": str(choice.finish_reason),
                 "index": choice.index,
             }
             attributes={
-                "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
             }
         if choice.message.tool_calls:
             response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
         span.span_instance.add_event(name="gen_ai.choice", attributes=attributes)
 
 
+def get_finish_reasons(result):
+    if hasattr(result, "choices") and result.choices:
+        return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices] 
+    else:
+        return None
+
+
 def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None:
+
     _set_attributes(
         span,
         ("gen_ai.response.id", result.id),
         ("gen_ai.response.model", result.model),
-        ("gen_ai.response.finish_reason", str(result.choices[-1].finish_reason)),
-        ("gen_ai.usage.completion_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None),
-        ("gen_ai.usage.prompt_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None),
+        ("gen_ai.usage.input_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None),
+        ("gen_ai.usage.output_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None),
     )
+    finish_reasons = get_finish_reasons(result)
+    span.add_attribute("gen_ai.response.finish_reasons", finish_reasons)
 
 
-def _add_request_span_attributes(span: AbstractSpan, span_name: str, kwargs: Any) -> None:
+def _add_request_span_attributes(span: AbstractSpan, span_name: str, args: Any, kwargs: Any) -> None:
     global _trace_inference_content
-    if span_name.startswith("ChatCompletionsClient.complete"):
-        _add_request_chat_attributes(span, **kwargs)
-        if _trace_inference_content:
-            _add_request_chat_message_event(span, **kwargs)
-    # TODO add more models here
+    _add_request_chat_attributes(span, *args, **kwargs)
+    if _trace_inference_content:
+        _add_request_chat_message_event(span, **kwargs)
 
 
 def _add_response_span_attributes(span: AbstractSpan, result: object) -> None:
@@ -115,7 +138,7 @@ def _add_response_span_attributes(span: AbstractSpan, result: object) -> None:
     # TODO add more models here
 
 
-def _accumulate_response(item, accumulate: dict[str, Any]) -> None:
+def _accumulate_response(item, accumulate: Dict[str, Any]) -> None:
     if item.finish_reason:
         accumulate["finish_reason"] = item.finish_reason
     if item.index:
@@ -146,7 +169,7 @@ def __init__(self, stream_obj):
         def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
             global _trace_inference_content
             try:
-                accumulate: dict[str, Any] = {}
+                accumulate: Dict[str, Any] = {}
                 for chunk in stream_obj:
                     for item in chunk.choices:
                         _accumulate_response(item, accumulate)
@@ -156,14 +179,19 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                     span.span_instance.add_event(
                         name="gen_ai.choice",
                         attributes={
-                            "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                            "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                             "gen_ai.event.content": json.dumps(accumulate)
                         }
                     )
                 _add_response_chat_attributes(span, chunk)
 
             except Exception as exc:
-                _set_attributes(span, ("error.type", exc.__class__.__name__))
+                # Set the span status to error
+                if isinstance(span.span_instance, Span):
+                    span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
+                module = exc.__module__ if exc.__module__ != "builtins" else ""
+                error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__
+                _set_attributes(span, ("error.type", error_type))
                 raise
 
             finally:
@@ -174,7 +202,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                         span.span_instance.add_event(
                             name="gen_ai.choice",
                             attributes={
-                                "get_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                                "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                                 "gen_ai.event.content": json.dumps(accumulate)
                             }
                         )
@@ -212,25 +240,34 @@ def inner(*args, **kwargs):
         if span_impl_type is None:
             return function(*args, **kwargs)
 
-        span_name = get_function_and_class_name(function, *args)
-        span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL)
-        try:
-            # tracing events not supported in azure-core-tracing-opentelemetry
-            # so need to access the span instance directly
-            with span_impl_type.change_context(span.span_instance):
-                _add_request_span_attributes(span, span_name, kwargs)
-                result = function(*args, **kwargs)
-                if kwargs.get("stream") is True:
-                    return _wrapped_stream(result, span)
-                _add_response_span_attributes(span, result)
-
-        except Exception as exc:
-            _set_attributes(span, ("error.type", exc.__class__.__name__))
-            span.finish()
-            raise
+        class_function_name = get_function_and_class_name(function, *args)
 
-        span.finish()
-        return result
+        if class_function_name.startswith("ChatCompletionsClient.complete"):
+            # span_name = {gen_ai.operation.name} {gen_ai.request.model}
+            span_name = f"chat {kwargs.get('model')}"
+            span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
+            try:
+                # tracing events not supported in azure-core-tracing-opentelemetry
+                # so need to access the span instance directly
+                with span_impl_type.change_context(span.span_instance):
+                    _add_request_span_attributes(span, span_name, args, kwargs)
+                    result = function(*args, **kwargs)
+                    if kwargs.get("stream") is True:
+                        return _wrapped_stream(result, span)
+                    _add_response_span_attributes(span, result)
+
+            except Exception as exc:
+                # Set the span status to error
+                if isinstance(span.span_instance, Span):
+                    span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
+                module = exc.__module__ if exc.__module__ != "builtins" else ""
+                error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__
+                _set_attributes(span, ("error.type", error_type))
+                span.finish()
+                raise
+
+            span.finish()
+            return result
 
     return inner
 
@@ -264,25 +301,34 @@ async def inner(*args, **kwargs):
         if span_impl_type is None:
             return function(*args, **kwargs)
 
-        span_name = get_function_and_class_name(function, *args)
-        span = span_impl_type(name=span_name, kind=SpanKind.INTERNAL)
-        try:
-            # tracing events not supported in azure-core-tracing-opentelemetry
-            # so need to access the span instance directly
-            with span_impl_type.change_context(span.span_instance):
-                _add_request_span_attributes(span, span_name, kwargs)
-                result = await function(*args, **kwargs)
-                if kwargs.get("stream") is True:
-                    return _wrapped_stream(result, span)
-                _add_response_span_attributes(span, result)
-
-        except Exception as exc:
-            _set_attributes(span, ("error.type", exc.__class__.__name__))
-            span.finish()
-            raise
+        class_function_name = get_function_and_class_name(function, *args)
 
-        span.finish()
-        return result
+        if class_function_name.startswith("ChatCompletionsClient.complete"):
+            # span_name = {gen_ai.operation.name} {gen_ai.request.model}
+            span_name = f"chat {kwargs.get('model')}"
+            span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
+            try:
+                # tracing events not supported in azure-core-tracing-opentelemetry
+                # so need to access the span instance directly
+                with span_impl_type.change_context(span.span_instance):
+                    _add_request_span_attributes(span, span_name, kwargs)
+                    result = await function(*args, **kwargs)
+                    if kwargs.get("stream") is True:
+                        return _wrapped_stream(result, span)
+                    _add_response_span_attributes(span, result)
+
+            except Exception as exc:
+                # Set the span status to error
+                if isinstance(span.span_instance, Span):
+                    span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
+                module = exc.__module__ if exc.__module__ != "builtins" else ""
+                error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__
+                _set_attributes(span, ("error.type", error_type))
+                span.finish()
+                raise
+
+            span.finish()
+            return result
 
     return inner
 

From 7cbbc0b7e450afc43c9323dee2fccbf977e39087 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 6 Sep 2024 16:19:16 -0500
Subject: [PATCH 06/35] file name change

---
 .../azure/core/tracing/_ai_inference_api_instrumentor.py    | 6 +++---
 ...entor_impl.py => _ai_inference_api_instrumentor_impl.py} | 0
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename sdk/core/azure-core/azure/core/tracing/{_inference_api_instrumentor_impl.py => _ai_inference_api_instrumentor_impl.py} (100%)

diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
index 1964e3a83793..09e25113902e 100644
--- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
+++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
@@ -20,16 +20,16 @@ def instrument(self):
         
         var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
         enable_content_tracing = self.str_to_bool(var_value)
-        from ._inference_api_instrumentor_impl import _inject_inference_api
+        from ._ai_inference_api_instrumentor_impl import _inject_inference_api
         _inject_inference_api(enable_content_tracing)
 
     def uninstrument(self):
         if not self.is_instrumented():
             raise RuntimeError("Not instrumented")
 
-        from ._inference_api_instrumentor_impl import _restore_inference_api
+        from ._ai_inference_api_instrumentor_impl import _restore_inference_api
         _restore_inference_api()
 
     def is_instrumented(self):
-        from ._inference_api_instrumentor_impl import _is_instrumented
+        from ._ai_inference_api_instrumentor_impl import _is_instrumented
         return _is_instrumented()
diff --git a/sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py
similarity index 100%
rename from sdk/core/azure-core/azure/core/tracing/_inference_api_instrumentor_impl.py
rename to sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py

From 941a9ae1f87e2726569691d26d587a2b2c88a7c0 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Tue, 10 Sep 2024 10:27:50 -0500
Subject: [PATCH 07/35] fixing exception handling

---
 .../core/tracing/_ai_inference_api_instrumentor_impl.py     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py
index 36e14936e174..6bc7773500bd 100644
--- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py
@@ -190,7 +190,7 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                 if isinstance(span.span_instance, Span):
                     span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
                 module = exc.__module__ if exc.__module__ != "builtins" else ""
-                error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__
+                error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
                 _set_attributes(span, ("error.type", error_type))
                 raise
 
@@ -261,7 +261,7 @@ def inner(*args, **kwargs):
                 if isinstance(span.span_instance, Span):
                     span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
                 module = exc.__module__ if exc.__module__ != "builtins" else ""
-                error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__
+                error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
                 _set_attributes(span, ("error.type", error_type))
                 span.finish()
                 raise
@@ -322,7 +322,7 @@ async def inner(*args, **kwargs):
                 if isinstance(span.span_instance, Span):
                     span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
                 module = exc.__module__ if exc.__module__ != "builtins" else ""
-                error_type = f"{module}.{exc.__qualname__}" if module else exc.__qualname__
+                error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
                 _set_attributes(span, ("error.type", error_type))
                 span.finish()
                 raise

From bcc6e7459ec77941b4dcac61ff6bc8854ab823fa Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Tue, 10 Sep 2024 11:02:24 -0500
Subject: [PATCH 08/35] relocating inference trace instrumentation

---
 .../azure/core/tracing/ai/__init__.py                        | 1 +
 .../azure/core/tracing/ai/inference/__init__.py              | 5 +++++
 .../tracing/ai/inference}/_ai_inference_api_instrumentor.py  | 0
 .../ai/inference}/_ai_inference_api_instrumentor_impl.py     | 2 +-
 .../tracing/ai/inference}/azure_telemetry_instrumentor.py    | 0
 sdk/core/azure-core-tracing-opentelemetry/setup.py           | 1 +
 6 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py
 create mode 100644 sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
 rename sdk/core/{azure-core/azure/core/tracing => azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference}/_ai_inference_api_instrumentor.py (100%)
 rename sdk/core/{azure-core/azure/core/tracing => azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference}/_ai_inference_api_instrumentor_impl.py (99%)
 rename sdk/core/{azure-core/azure/core/tracing => azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference}/azure_telemetry_instrumentor.py (100%)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py
new file mode 100644
index 000000000000..d55ccad1f573
--- /dev/null
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
new file mode 100644
index 000000000000..f28d7b1d1317
--- /dev/null
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
@@ -0,0 +1,5 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor
diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
similarity index 100%
rename from sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor.py
rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
diff --git a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
similarity index 99%
rename from sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py
rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
index 6bc7773500bd..a7a25ce59d69 100644
--- a/sdk/core/azure-core/azure/core/tracing/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
@@ -15,7 +15,7 @@
 from azure.core.tracing import AbstractSpan
 from azure.core.tracing import SpanKind
 from azure.core.settings import settings
-from .common import get_function_and_class_name
+from azure.core.tracing.common import get_function_and_class_name
 from opentelemetry.trace import Status, StatusCode, Span
 
 _inference_traces_enabled: bool = False
diff --git a/sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py
similarity index 100%
rename from sdk/core/azure-core/azure/core/tracing/azure_telemetry_instrumentor.py
rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py
diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py
index ae0a5baf512a..929e1cb3fee6 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/setup.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py
@@ -55,6 +55,7 @@
     zip_safe=False,
     packages=[
         "azure.core.tracing.ext.opentelemetry_span",
+        "azure.core.tracing.ai.inference",
     ],
     include_package_data=True,
     package_data={

From 709923c7d7a4b4ca70c88e834df9f3c9c4ca779c Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Tue, 10 Sep 2024 11:26:13 -0500
Subject: [PATCH 09/35] reverting change in azure core tracing

---
 sdk/core/azure-core/azure/core/tracing/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/core/azure-core/azure/core/tracing/__init__.py b/sdk/core/azure-core/azure/core/tracing/__init__.py
index 79738b9fd650..ecf6fe6da8df 100644
--- a/sdk/core/azure-core/azure/core/tracing/__init__.py
+++ b/sdk/core/azure-core/azure/core/tracing/__init__.py
@@ -8,6 +8,5 @@
     HttpSpanMixin,
     Link,
 )
-from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor
 
 __all__ = ["AbstractSpan", "SpanKind", "HttpSpanMixin", "Link"]

From a64d8703734e4d71619cf586a0e93bf0aba988ee Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Mon, 16 Sep 2024 16:44:04 -0500
Subject: [PATCH 10/35] fixes

---
 .../_ai_inference_api_instrumentor_impl.py    | 125 ++++++++++++------
 1 file changed, 82 insertions(+), 43 deletions(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
index a7a25ce59d69..333d16144894 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
@@ -3,6 +3,7 @@
 # ---------------------------------------------------------
 
 import asyncio
+import copy
 import functools
 import importlib
 import json
@@ -65,11 +66,14 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any)
     client = args[0]
     endpoint = client._config.endpoint
     server_address, port = parse_url(endpoint)
+    model = INFERENCE_GEN_AI_SYSTEM_NAME
+    if kwargs.get('model') is not None:
+        model = kwargs.get('model')
     _set_attributes(
         span,
         ("gen_ai.operation.name", "chat"),
         ("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME),
-        ("gen_ai.request.model", kwargs.get("model")),
+        ("gen_ai.request.model", model),
         ("gen_ai.request.max_tokens", kwargs.get("max_tokens")),
         ("gen_ai.request.temperature", kwargs.get("temperature")),
         ("gen_ai.request.top_p", kwargs.get("top_p")),
@@ -79,38 +83,65 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any)
         span.add_attribute("server.port", port)
 
 
+def remove_function_call_names_and_arguments(tool_calls: list) -> list:
+    tool_calls_copy = copy.deepcopy(tool_calls)
+    for tool_call in tool_calls_copy:
+        if 'function' in tool_call:
+            if 'name' in tool_call['function']:
+                del tool_call['function']['name']
+            if 'arguments' in tool_call['function']:
+                del tool_call['function']['arguments']
+            if not tool_call['function']:
+                del tool_call['function']
+    return tool_calls_copy
+
+
+def get_finish_reasons(result):
+    if hasattr(result, "choices") and result.choices:
+        return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices]
+    else:
+        return None
+
+
+def get_finish_reason_for_choice(choice):
+    return getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none"
+
+
 def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None:
     for choice in result.choices:
         if _trace_inference_content:
             response: Dict[str, Any] = {
                 "message": {"content": choice.message.content},
-                "finish_reason": str(choice.finish_reason),
+                "finish_reason": get_finish_reason_for_choice(choice),
                 "index": choice.index,
             }
+            if choice.message.tool_calls:
+                response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
             attributes={
                 "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
                 "gen_ai.event.content": json.dumps(response)
             }
         else:
             response: Dict[str, Any] = {
-                "finish_reason": str(choice.finish_reason),
+                "finish_reason": get_finish_reason_for_choice(choice),
                 "index": choice.index,
             }
-            attributes={
-                "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-            }
-        if choice.message.tool_calls:
-            response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
+            if choice.message.tool_calls:
+                response["message"] = {}
+                tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(choice.message.tool_calls)
+                response["message"]["tool_calls"] = [tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed]
+                attributes={
+                    "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                    "gen_ai.event.content": json.dumps(response)
+                }
+            else:
+                attributes={
+                    "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                    "gen_ai.event.content": json.dumps(response)
+                }
         span.span_instance.add_event(name="gen_ai.choice", attributes=attributes)
 
 
-def get_finish_reasons(result):
-    if hasattr(result, "choices") and result.choices:
-        return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices] 
-    else:
-        return None
-
-
 def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None:
 
     _set_attributes(
@@ -150,15 +181,16 @@ def _accumulate_response(item, accumulate: Dict[str, Any]) -> None:
     if item.delta.tool_calls:
         accumulate.setdefault("message", {})
         accumulate["message"].setdefault("tool_calls", [])
-        for tool_call in item.delta.tool_calls:
-            if tool_call.id:
-                accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}})
-            if tool_call.type:
-                accumulate["message"]["tool_calls"][-1]["type"] = tool_call.type
-            if tool_call.function and tool_call.function.name:
-                accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name
-            if tool_call.function and tool_call.function.arguments:
-                accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments
+        if item.delta.tool_calls is not None:
+            for tool_call in item.delta.tool_calls:
+                if tool_call.id:
+                    accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}})
+                if tool_call.function:
+                    accumulate["message"]["tool_calls"][-1]["type"] = "function"
+                if tool_call.function and tool_call.function.name:
+                    accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name
+                if tool_call.function and tool_call.function.arguments:
+                    accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments
 
 
 def _wrapped_stream(stream_obj: _models.StreamingChatCompletions, span: AbstractSpan) ->  _models.StreamingChatCompletions:
@@ -175,14 +207,6 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                         _accumulate_response(item, accumulate)
                     yield chunk
 
-                if _trace_inference_content:
-                    span.span_instance.add_event(
-                        name="gen_ai.choice",
-                        attributes={
-                            "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                            "gen_ai.event.content": json.dumps(accumulate)
-                        }
-                    )
                 _add_response_chat_attributes(span, chunk)
 
             except Exception as exc:
@@ -198,14 +222,24 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                 if stream_obj._done is False:
                     if accumulate.get("finish_reason") is None:
                         accumulate["finish_reason"] = "error"
-                    if _trace_inference_content:
-                        span.span_instance.add_event(
-                            name="gen_ai.choice",
-                            attributes={
-                                "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                                "gen_ai.event.content": json.dumps(accumulate)
-                            }
-                        )
+                else:
+                    # Only one choice expected with streaming
+                    accumulate["index"] = 0
+                    # Delete message if content tracing is not enabled
+                    if not _trace_inference_content:
+                        if 'message' in accumulate:
+                            if 'content' in accumulate['message']:
+                                del accumulate['message']['content']
+                            if not accumulate['message']:
+                                del accumulate['message']
+
+                    span.span_instance.add_event(
+                        name="gen_ai.choice",
+                        attributes={
+                            "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                            "gen_ai.event.content": json.dumps(accumulate)
+                        }
+                    )
                 span.finish()
 
     return StreamWrapper(stream_obj)
@@ -243,8 +277,10 @@ def inner(*args, **kwargs):
         class_function_name = get_function_and_class_name(function, *args)
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
-            # span_name = {gen_ai.operation.name} {gen_ai.request.model}
-            span_name = f"chat {kwargs.get('model')}"
+            model = INFERENCE_GEN_AI_SYSTEM_NAME
+            if kwargs.get('model') is not None:
+                model = kwargs.get('model')
+            span_name = f"chat {model}"
             span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
             try:
                 # tracing events not supported in azure-core-tracing-opentelemetry
@@ -305,7 +341,10 @@ async def inner(*args, **kwargs):
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
             # span_name = {gen_ai.operation.name} {gen_ai.request.model}
-            span_name = f"chat {kwargs.get('model')}"
+            model = INFERENCE_GEN_AI_SYSTEM_NAME
+            if kwargs.get('model') is not None:
+                model = kwargs.get('model')
+            span_name = f"chat {model}"
             span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
             try:
                 # tracing events not supported in azure-core-tracing-opentelemetry

From 198b9cd8c17fb8a3ab2cd116eab03630201d6bc8 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Tue, 17 Sep 2024 09:50:58 -0500
Subject: [PATCH 11/35] changing span and model name for cases when model info
 not available

---
 .../_ai_inference_api_instrumentor_impl.py    | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
index 333d16144894..69de1bda9365 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
@@ -66,7 +66,7 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any)
     client = args[0]
     endpoint = client._config.endpoint
     server_address, port = parse_url(endpoint)
-    model = INFERENCE_GEN_AI_SYSTEM_NAME
+    model = 'chat'
     if kwargs.get('model') is not None:
         model = kwargs.get('model')
     _set_attributes(
@@ -277,10 +277,12 @@ def inner(*args, **kwargs):
         class_function_name = get_function_and_class_name(function, *args)
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
-            model = INFERENCE_GEN_AI_SYSTEM_NAME
-            if kwargs.get('model') is not None:
+            if kwargs.get('model') is None:
+                span_name = f"chat"
+            else:
                 model = kwargs.get('model')
-            span_name = f"chat {model}"
+                span_name = f"chat {model}"
+
             span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
             try:
                 # tracing events not supported in azure-core-tracing-opentelemetry
@@ -340,11 +342,12 @@ async def inner(*args, **kwargs):
         class_function_name = get_function_and_class_name(function, *args)
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
-            # span_name = {gen_ai.operation.name} {gen_ai.request.model}
-            model = INFERENCE_GEN_AI_SYSTEM_NAME
-            if kwargs.get('model') is not None:
+            if kwargs.get('model') is None:
+                span_name = f"chat"
+            else:
                 model = kwargs.get('model')
-            span_name = f"chat {model}"
+                span_name = f"chat {model}"
+
             span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
             try:
                 # tracing events not supported in azure-core-tracing-opentelemetry

From cd8bba21a179ee8fdc7dcefcc08e754eed329cd0 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Tue, 17 Sep 2024 13:01:24 -0500
Subject: [PATCH 12/35] some fixes

---
 .../core/tracing/ai/inference/__init__.py     |  2 +-
 .../_ai_inference_api_instrumentor.py         |  2 +-
 .../_ai_inference_api_instrumentor_impl.py    | 20 +++++++++++--------
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
index f28d7b1d1317..bd8ddc1e73b7 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
@@ -2,4 +2,4 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-from ._ai_inference_api_instrumentor import AiInferenceApiInstrumentor
+from ._ai_inference_api_instrumentor import AIInferenceApiInstrumentor
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
index 09e25113902e..5156b77ee11b 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
@@ -5,7 +5,7 @@
 from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor
 
 
-class AiInferenceApiInstrumentor(AzureTelemetryInstrumentor):
+class AIInferenceApiInstrumentor(AzureTelemetryInstrumentor):
     def __init__(self):
         super().__init__()
 
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
index 69de1bda9365..d41ab2ad74ca 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
@@ -232,14 +232,18 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                                 del accumulate['message']['content']
                             if not accumulate['message']:
                                 del accumulate['message']
-
-                    span.span_instance.add_event(
-                        name="gen_ai.choice",
-                        attributes={
-                            "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                            "gen_ai.event.content": json.dumps(accumulate)
-                        }
-                    )
+                        if 'message' in accumulate:
+                            if 'tool_calls' in accumulate['message']:
+                                tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(accumulate['message']['tool_calls'])
+                                accumulate['message']['tool_calls'] = [tool for tool in tool_calls_function_names_and_arguments_removed]
+
+                span.span_instance.add_event(
+                    name="gen_ai.choice",
+                    attributes={
+                        "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                        "gen_ai.event.content": json.dumps(accumulate)
+                    }
+                )
                 span.finish()
 
     return StreamWrapper(stream_obj)

From b28a3fe2e826c79054dd3fdafc51d283fe7fa6db Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 20 Sep 2024 16:55:16 -0500
Subject: [PATCH 13/35] adding sync trace tests

---
 .../azure-ai-inference/dev_requirements.txt   |    4 +-
 .../tests/gen_ai_trace_verifier.py            |  103 ++
 .../tests/memory_trace_exporter.py            |   39 +
 .../tests/test_model_inference_client.py      | 1110 +++++++++++++++++
 4 files changed, 1255 insertions(+), 1 deletion(-)
 create mode 100644 sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
 create mode 100644 sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py

diff --git a/sdk/ai/azure-ai-inference/dev_requirements.txt b/sdk/ai/azure-ai-inference/dev_requirements.txt
index 105486471444..4f5b55a5a48a 100644
--- a/sdk/ai/azure-ai-inference/dev_requirements.txt
+++ b/sdk/ai/azure-ai-inference/dev_requirements.txt
@@ -1,3 +1,5 @@
 -e ../../../tools/azure-sdk-tools
 ../../core/azure-core
-aiohttp
\ No newline at end of file
+../../core/azure-core-tracing-opentelemetry
+aiohttp
+opentelemetry-sdk
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
new file mode 100644
index 000000000000..aeb8266abbc7
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
@@ -0,0 +1,103 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# ------------------------------------
+import datetime
+import json
+from opentelemetry.sdk.trace import Span
+
+
+class GenAiTraceVerifier:
+
+    def check_span_attributes(self, span, attributes):
+        # Convert the list of tuples to a dictionary for easier lookup  
+        attribute_dict = dict(attributes)
+   
+        for attribute_name in span.attributes.keys():
+            # Check if the attribute name exists in the input attributes  
+            if attribute_name not in attribute_dict:
+                return False
+
+            attribute_value = attribute_dict[attribute_name]
+            if isinstance(attribute_value, list):
+                # Check if the attribute value in the span matches the provided list
+                if span.attributes[attribute_name] != attribute_value:
+                    return False
+            elif isinstance(attribute_value, tuple):
+                # Check if the attribute value in the span matches the provided list
+                if span.attributes[attribute_name] != attribute_value:
+                    return False                    
+            else:
+                # Check if the attribute value matches the provided value
+                if attribute_value != "" and span.attributes[attribute_name] != attribute_value:
+                    return False
+                # Check if the attribute value in the span is not empty when the provided value is ""
+                elif attribute_value == "" and not span.attributes[attribute_name]:
+                    return False
+
+        return True
+
+    def is_valid_json(self, my_string):
+        try:
+            json_object = json.loads(my_string)
+        except ValueError as e1:
+            return False
+        except TypeError as e2:
+            return False
+        return True
+
+    def check_json_string(self, expected_json, actual_json):
+        if self.is_valid_json(expected_json) and self.is_valid_json(actual_json):
+            return self.check_event_attributes(json.loads(expected_json), json.loads(actual_json))
+        else:
+            return False
+
+    def check_event_attributes(self, expected_dict, actual_dict):
+        if set(expected_dict.keys()) != set(actual_dict.keys()):
+            return False
+        for key, expected_val in expected_dict.items():
+            if key not in actual_dict:
+                return False  
+            actual_val = actual_dict[key]
+
+            if self.is_valid_json(expected_val):
+                if not self.is_valid_json(actual_val):
+                    return False
+                if not self.check_json_string(expected_val, actual_val):
+                    return False
+            elif isinstance(expected_val, dict):
+                if not isinstance(actual_val, dict):
+                    return False  
+                if not self.check_event_attributes(expected_val, actual_val):
+                    return False
+            elif isinstance(expected_val, list):  
+                if not isinstance(actual_val, list):  
+                    return False
+                if len(expected_val) != len(actual_val):
+                    return False
+                for expected_list, actual_list in zip(expected_val, actual_val):  
+                    if not self.check_event_attributes(expected_list, actual_list):  
+                        return False                 
+            elif isinstance(expected_val, str) and expected_val == "*":
+                if actual_val == "":
+                    return False
+            elif expected_val != actual_val:
+                return False
+        return True
+
+    def check_span_events(self, span, expected_events):
+        span_events = list(span.events)  # Create a list of events from the span
+
+        for expected_event in expected_events:
+            for actual_event in span_events:
+                if expected_event['name'] == actual_event.name:
+                    if not self.check_event_attributes(expected_event['attributes'], actual_event.attributes):
+                        return False
+                    span_events.remove(actual_event)  # Remove the matched event from the span_events
+                    break
+            else:
+                return False  # If no match found for an expected event
+
+        if len(span_events) > 0:  # If there are any additional events in the span_events
+            return False
+
+        return True
diff --git a/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py
new file mode 100644
index 000000000000..7563e65cfc87
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py
@@ -0,0 +1,39 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+from opentelemetry.sdk.trace import Span
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from typing import List, Sequence
+
+
+class MemoryTraceExporter(SpanExporter):
+
+    def __init__(self):
+        self._trace_list = []
+
+    def export(self, spans: Sequence[Span]) -> SpanExportResult:
+        for span in spans:
+            self._trace_list.append(span)
+        return SpanExportResult.SUCCESS
+
+    def shutdown(self) -> None:
+        self._trace_list.clear()
+
+    def get_trace_list(self) -> List[Span]:
+        return self._trace_list
+
+    def contains(self, text: str) -> bool:
+        for span in self._trace_list:
+            if text in str(span):
+                return True
+        return False
+
+    def get_spans_by_name_starts_with(self, name_prefix: str) -> List[Span]:
+        return [span for span in self._trace_list if span.name.startswith(name_prefix)]
+
+    def get_spans_by_name(self, name: str) -> List[Span]:
+        return [span for span in self._trace_list if span.name == name]
+    
+    def get_spans(self) -> List[Span]:  
+        return [span for span in self._trace_list]
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index 89d1e4d90b3d..852bd908d5c3 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # ------------------------------------
 import os
+import datetime
 import json
 import azure.ai.inference as sdk
 
@@ -13,14 +14,36 @@
     ServicePreparerEmbeddings,
 )
 from azure.core.pipeline.transport import RequestsTransport
+from azure.core.settings import settings
 from devtools_testutils import recorded_by_proxy
 from azure.core.exceptions import AzureError, ServiceRequestError
 from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor
+from memory_trace_exporter import MemoryTraceExporter
+from gen_ai_trace_verifier import GenAiTraceVerifier
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 
+CONTENT_TRACING_ENV_VARIABLE = "AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED"
+content_tracing_initial_value = os.getenv(CONTENT_TRACING_ENV_VARIABLE)
 
 # The test class name needs to start with "Test" to get collected by pytest
 class TestModelClient(ModelClientTestBase):
 
+    @classmethod
+    def teardown_class(cls):
+        mode = 'a' if os.path.exists("teardown.txt") else 'w'
+
+        with open("teardown.txt", mode) as file:
+            # Get current timestamp
+            timestamp = datetime.datetime.now()
+            text = "Setting " + CONTENT_TRACING_ENV_VARIABLE + " to " + str(content_tracing_initial_value)
+            # Add timestamp to the start of the text and write to file
+            file.write(f'{timestamp}: {text}\n')
+        if content_tracing_initial_value is not None:
+            os.environ[CONTENT_TRACING_ENV_VARIABLE] = content_tracing_initial_value
+
     # **********************************************************************************
     #
     #                               UNIT TESTS
@@ -795,3 +818,1090 @@ def test_embeddings_on_chat_completion_endpoint(self, **kwargs):
             assert "not found" in e.message.lower() or "not allowed" in e.message.lower()
         client.close()
         assert exception_caught
+
+
+    # **********************************************************************************
+    #
+    #                            TRACING TESTS - CHAT COMPLETIONS
+    #
+    # **********************************************************************************
+
+    def setup_memory_trace_exporter(self) -> MemoryTraceExporter:
+        # Setup Azure Core settings to use OpenTelemetry tracing
+        settings.tracing_implementation = "OpenTelemetry"
+        trace.set_tracer_provider(TracerProvider())
+        tracer = trace.get_tracer(__name__)
+        memoryExporter = MemoryTraceExporter()
+        span_processor = SimpleSpanProcessor(memoryExporter)
+        trace.get_tracer_provider().add_span_processor(span_processor)
+        return span_processor, memoryExporter
+
+    def modify_env_var(self, name, new_value):
+        current_value = os.getenv(name)
+        os.environ[name] = new_value
+        return current_value
+
+    @ServicePreparerChatCompletions()
+    def test_instrumentation(self, **kwargs):
+        client = self._create_chat_client(**kwargs)
+        exception_caught = False
+        try:
+            assert AIInferenceApiInstrumentor().is_instrumented() == False
+            AIInferenceApiInstrumentor().instrument()
+            assert AIInferenceApiInstrumentor().is_instrumented() == True
+            AIInferenceApiInstrumentor().uninstrument()
+            assert AIInferenceApiInstrumentor().is_instrumented() == False
+        except RuntimeError as e:
+            exception_caught = True
+            print(e)
+        client.close()
+        assert exception_caught == False
+
+    @ServicePreparerChatCompletions()
+    def test_instrumenting_twice_causes_exception(self, **kwargs):
+        client = self._create_chat_client(**kwargs)
+        exception_caught = False
+        instrumented_once = False
+        try:
+            AIInferenceApiInstrumentor().instrument()
+            instrumented_once = True
+            AIInferenceApiInstrumentor().instrument()
+        except RuntimeError as e:
+            exception_caught = True
+            print(e)
+        client.close()
+        assert instrumented_once == True
+        assert exception_caught == True
+
+    @ServicePreparerChatCompletions()
+    def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
+        client = self._create_chat_client(**kwargs)
+        exception_caught = False
+        try:
+            AIInferenceApiInstrumentor().uninstrument()
+        except RuntimeError as e:
+            exception_caught = True
+            print(e)
+        client.close()
+        assert exception_caught == True
+
+    @ServicePreparerChatCompletions()
+    def test_uninstrumenting_twise_causes_exception(self, **kwargs):
+        client = self._create_chat_client(**kwargs)
+        exception_caught = False
+        uninstrumented_once = False
+        try:
+            AIInferenceApiInstrumentor().instrument()
+            AIInferenceApiInstrumentor().uninstrument()
+            uninstrumented_once = True
+            AIInferenceApiInstrumentor().uninstrument()
+        except RuntimeError as e:
+            exception_caught = True
+            print(e)
+        client.close()
+        assert uninstrumented_once == True
+        assert exception_caught == True
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+        response = client.complete(
+            messages=[
+                sdk.models.SystemMessage(content="You are a helpful assistant."),
+                sdk.models.UserMessage(content="What is the capital of France?"),
+            ],
+        )
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 1
+        span = spans[0]
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                               ('gen_ai.system', 'az.ai.inference'),
+                               ('gen_ai.request.model', ''),
+                               ('server.address', ''),
+                               ('gen_ai.response.id', ''),
+                               ('gen_ai.response.model', ''),
+                               ('gen_ai.usage.input_tokens', ''),
+                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {                
+                'name': 'gen_ai.choice',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"finish_reason": "stop", "index": 0}'
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
+        assert events_match == True
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+        response = client.complete(
+            messages=[
+                sdk.models.SystemMessage(content="You are a helpful assistant."),
+                sdk.models.UserMessage(content="What is the capital of France?"),
+            ],
+        )
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 1
+        span = spans[0]
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                               ('gen_ai.system', 'az.ai.inference'),
+                               ('gen_ai.request.model', ''),
+                               ('server.address', ''),
+                               ('gen_ai.response.id', ''),
+                               ('gen_ai.response.model', ''),
+                               ('gen_ai.usage.input_tokens', ''),
+                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                'name': 'gen_ai.system.message',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"role": "system", "content": "You are a helpful assistant."}'
+                }
+            },
+            {
+                'name': 'gen_ai.user.message',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"role": "user", "content": "What is the capital of France?"}'
+                }
+            },
+            {
+                'name': 'gen_ai.choice',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"message": {"content": "*"}, "finish_reason": "stop", "index": 0}'
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
+        assert events_match == True
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs):
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+        response = client.complete(
+            messages=[
+                sdk.models.SystemMessage(content="You are a helpful assistant."),
+                sdk.models.UserMessage(content="What is the capital of France?"),
+            ],
+            stream=True
+        )
+        response_content = ""
+        for update in response:
+            if update.choices:
+                response_content = response_content + update.choices[0].delta.content
+        client.close()
+
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 1
+        span = spans[0]
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                               ('gen_ai.system', 'az.ai.inference'),
+                               ('gen_ai.request.model', ''),
+                               ('server.address', ''),
+                               ('gen_ai.response.id', ''),
+                               ('gen_ai.response.model', ''),
+                               ('gen_ai.usage.input_tokens', ''),
+                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                'name': 'gen_ai.choice',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"finish_reason": "stop", "index": 0}'
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
+        assert events_match == True
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs):
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+        response = client.complete(
+            messages=[
+                sdk.models.SystemMessage(content="You are a helpful assistant."),
+                sdk.models.UserMessage(content="What is the capital of France?"),
+            ],
+            stream=True
+        )
+        response_content = ""
+        for update in response:
+            if update.choices:
+                response_content = response_content + update.choices[0].delta.content
+        client.close()
+
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 1
+        span = spans[0]
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                               ('gen_ai.system', 'az.ai.inference'),
+                               ('gen_ai.request.model', ''),
+                               ('server.address', ''),
+                               ('gen_ai.response.id', ''),
+                               ('gen_ai.response.model', ''),
+                               ('gen_ai.usage.input_tokens', ''),
+                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                'name': 'gen_ai.system.message',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"role": "system", "content": "You are a helpful assistant."}'
+                }
+            },
+            {
+                'name': 'gen_ai.user.message',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"role": "user", "content": "What is the capital of France?"}'
+                }
+            },
+            {
+                'name': 'gen_ai.choice',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"message": {"content": "*"}, "finish_reason": "stop", "index": 0}'
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
+        assert events_match == True
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs):
+        import json
+        from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
+        from azure.ai.inference import ChatCompletionsClient
+
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+
+        def get_weather(city: str) -> str:
+            if city == "Seattle":
+                return "Nice weather"
+            elif city == "New York City":
+                return "Good weather"
+            else:
+                return "Unavailable"
+
+        weather_description = ChatCompletionsToolDefinition(
+            function=FunctionDefinition(
+                name="get_weather",
+                description="Returns description of the weather in the specified city",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The name of the city for which weather info is requested",
+                        },
+                    },
+                    "required": ["city"],
+                },
+            )
+        )
+        messages=[
+            sdk.models.SystemMessage(content="You are a helpful assistant."),
+            sdk.models.UserMessage(content="What is the weather in Seattle?"),
+        ]
+
+        response = client.complete(messages=messages, tools=[weather_description])
+
+        if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS:
+            # Append the previous model response to the chat history
+            messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls))
+            # The tool should be of type function call.
+            if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0:
+                for tool_call in response.choices[0].message.tool_calls:
+                    if type(tool_call) is ChatCompletionsToolCall:
+                        function_args = json.loads(tool_call.function.arguments.replace("'", '"'))
+                        print(f"Calling function `{tool_call.function.name}` with arguments {function_args}")
+                        callable_func = locals()[tool_call.function.name]
+                        function_response = callable_func(**function_args)
+                        print(f"Function response = {function_response}")
+                        # Provide the tool response to the model, by appending it to the chat history
+                        messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response))
+                # With the additional tools information on hand, get another response from the model
+                response = client.complete(messages=messages, tools=[weather_description])
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 2
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('tool_calls',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
+        assert attributes_match == True
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.system.message",
+                "timestamp": "",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
+                }
+            },
+            {
+                "name": "gen_ai.user.message",
+                "timestamp": "",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
+                }
+            },
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}"
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
+        assert events_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.system.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
+                }
+            },
+            {
+                "name": "gen_ai.user.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
+                }
+            },
+            {
+                "name": "gen_ai.assistant.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}"
+                }
+            },
+            {
+                "name": "gen_ai.tool.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}"
+                }
+            },
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}"
+                }
+            }
+        ] 
+        events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
+        assert events_match == True
+
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs):
+        import json
+        from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
+        from azure.ai.inference import ChatCompletionsClient
+
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+
+        def get_weather(city: str) -> str:
+            if city == "Seattle":
+                return "Nice weather"
+            elif city == "New York City":
+                return "Good weather"
+            else:
+                return "Unavailable"
+
+        weather_description = ChatCompletionsToolDefinition(
+            function=FunctionDefinition(
+                name="get_weather",
+                description="Returns description of the weather in the specified city",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The name of the city for which weather info is requested",
+                        },
+                    },
+                    "required": ["city"],
+                },
+            )
+        )
+        messages=[
+            sdk.models.SystemMessage(content="You are a helpful assistant."),
+            sdk.models.UserMessage(content="What is the weather in Seattle?"),
+        ]
+
+        response = client.complete(messages=messages, tools=[weather_description])
+
+        if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS:
+            # Append the previous model response to the chat history
+            messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls))
+            # The tool should be of type function call.
+            if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0:
+                for tool_call in response.choices[0].message.tool_calls:
+                    if type(tool_call) is ChatCompletionsToolCall:
+                        function_args = json.loads(tool_call.function.arguments.replace("'", '"'))
+                        print(f"Calling function `{tool_call.function.name}` with arguments {function_args}")
+                        callable_func = locals()[tool_call.function.name]
+                        function_response = callable_func(**function_args)
+                        print(f"Function response = {function_response}")
+                        # Provide the tool response to the model, by appending it to the chat history
+                        messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response))
+                # With the additional tools information on hand, get another response from the model
+                response = client.complete(messages=messages, tools=[weather_description])
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 2
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('tool_calls',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
+        assert attributes_match == True
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"finish_reason\": \"tool_calls\", \"index\": 0, \"message\": {\"tool_calls\": [{\"function\": {\"call_id\": null}, \"id\": \"*\", \"type\": \"function\"}]}}"
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
+        assert events_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"finish_reason\": \"stop\", \"index\": 0}"
+                }
+            }
+        ] 
+        events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
+        assert events_match == True
+
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs):
+        import json
+        from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
+        from azure.ai.inference import ChatCompletionsClient
+
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+
+        def get_weather(city: str) -> str:
+            if city == "Seattle":
+                return "Nice weather"
+            elif city == "New York City":
+                return "Good weather"
+            else:
+                return "Unavailable"
+
+        weather_description = ChatCompletionsToolDefinition(
+            function=FunctionDefinition(
+                name="get_weather",
+                description="Returns description of the weather in the specified city",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The name of the city for which weather info is requested",
+                        },
+                    },
+                    "required": ["city"],
+                },
+            )
+        )
+        messages=[
+            sdk.models.SystemMessage(content="You are a helpful assistant."),
+            sdk.models.UserMessage(content="What is the weather in Seattle?"),
+        ]
+
+        response = client.complete(messages=messages, tools=[weather_description])
+
+        if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS:
+            # Append the previous model response to the chat history
+            messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls))
+            # The tool should be of type function call.
+            if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0:
+                for tool_call in response.choices[0].message.tool_calls:
+                    if type(tool_call) is ChatCompletionsToolCall:
+                        function_args = json.loads(tool_call.function.arguments.replace("'", '"'))
+                        print(f"Calling function `{tool_call.function.name}` with arguments {function_args}")
+                        callable_func = locals()[tool_call.function.name]
+                        function_response = callable_func(**function_args)
+                        print(f"Function response = {function_response}")
+                        # Provide the tool response to the model, by appending it to the chat history
+                        messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response))
+                # With the additional tools information on hand, get another response from the model
+                response = client.complete(messages=messages, tools=[weather_description])
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 2
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('tool_calls',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
+        assert attributes_match == True
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.system.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
+                }
+            },
+            {
+                "name": "gen_ai.user.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
+                }
+            },
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}"
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
+        assert events_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.system.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
+                }
+            },
+            {
+                "name": "gen_ai.user.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
+                }
+            },
+            {
+                "name": "gen_ai.assistant.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}"
+                }
+            },
+            {
+                "name": "gen_ai.tool.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}"
+                }
+            },
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}"
+                }
+            }
+        ] 
+        events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
+        assert events_match == True
+
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs):
+        import json
+        from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
+        from azure.ai.inference import ChatCompletionsClient
+
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+
+        def get_weather(city: str) -> str:
+            if city == "Seattle":
+                return "Nice weather"
+            elif city == "New York City":
+                return "Good weather"
+            else:
+                return "Unavailable"
+
+        weather_description = ChatCompletionsToolDefinition(
+            function=FunctionDefinition(
+                name="get_weather",
+                description="Returns description of the weather in the specified city",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The name of the city for which weather info is requested",
+                        },
+                    },
+                    "required": ["city"],
+                },
+            )
+        )
+        messages=[
+            sdk.models.SystemMessage(content="You are a helpful AI assistant."),
+            sdk.models.UserMessage(content="What is the weather in Seattle?"),
+        ]
+
+        response = client.complete(
+            messages=messages,
+            tools=[weather_description],
+            stream=True)
+
+        # At this point we expect a function tool call in the model response
+        tool_call_id: str = ""
+        function_name: str = ""
+        function_args: str = ""
+        for update in response:
+            if update.choices[0].delta.tool_calls is not None:
+                if update.choices[0].delta.tool_calls[0].function.name is not None:
+                    function_name = update.choices[0].delta.tool_calls[0].function.name
+                if update.choices[0].delta.tool_calls[0].id is not None:
+                    tool_call_id = update.choices[0].delta.tool_calls[0].id
+                function_args += update.choices[0].delta.tool_calls[0].function.arguments or ""
+        
+        # Append the previous model response to the chat history
+        messages.append(
+            AssistantMessage(
+                tool_calls=[
+                    ChatCompletionsToolCall(
+                        id=tool_call_id,
+                        function=FunctionCall(
+                            name=function_name,
+                            arguments=function_args
+                        )
+                    )
+                ]
+            )
+        )
+
+        # Make the function call
+        callable_func = locals()[function_name]
+        function_args_mapping = json.loads(function_args.replace("'", '"'))
+        function_response = callable_func(**function_args_mapping)
+
+        # Append the function response as a tool message to the chat history
+        messages.append(
+            ToolMessage(
+                tool_call_id=tool_call_id,
+                content=function_response
+            )
+        )
+
+        # With the additional tools information on hand, get another streaming response from the model
+        response = client.complete(
+            messages=messages,
+            tools=[weather_description],
+            stream=True
+        )
+
+        content = ""
+        for update in response:
+            content = content + update.choices[0].delta.content
+
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 2
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('tool_calls',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
+        assert attributes_match == True
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.system.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful AI assistant.\"}"
+                }
+            },
+            {
+                "name": "gen_ai.user.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
+                }
+            },
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"finish_reason\": \"tool_calls\", \"message\": {\"tool_calls\": [{\"id\": \"*\", \"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\"}}]}, \"index\": 0}"
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
+        assert events_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.system.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful AI assistant.\"}"
+                }
+            },
+            {
+                "name": "gen_ai.user.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
+                }
+            },
+            {
+                "name": "gen_ai.assistant.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"id\": \"*\", \"function\": {\"name\": \"get_weather\", \"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\"}, \"type\": \"function\"}]}"
+                }
+            },
+            {
+                "name": "gen_ai.tool.message",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}"
+                }
+            },
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}"
+                }
+            }
+        ] 
+        events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
+        assert events_match == True
+
+        AIInferenceApiInstrumentor().uninstrument()
+
+    @ServicePreparerChatCompletions()
+    def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs):
+        import json
+        from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
+        from azure.ai.inference import ChatCompletionsClient
+
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
+        client = self._create_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+
+        def get_weather(city: str) -> str:
+            if city == "Seattle":
+                return "Nice weather"
+            elif city == "New York City":
+                return "Good weather"
+            else:
+                return "Unavailable"
+
+        weather_description = ChatCompletionsToolDefinition(
+            function=FunctionDefinition(
+                name="get_weather",
+                description="Returns description of the weather in the specified city",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The name of the city for which weather info is requested",
+                        },
+                    },
+                    "required": ["city"],
+                },
+            )
+        )
+        messages=[
+            sdk.models.SystemMessage(content="You are a helpful assistant."),
+            sdk.models.UserMessage(content="What is the weather in Seattle?"),
+        ]
+
+        response = client.complete(
+            messages=messages,
+            tools=[weather_description],
+            stream=True)
+
+        # At this point we expect a function tool call in the model response
+        tool_call_id: str = ""
+        function_name: str = ""
+        function_args: str = ""
+        for update in response:
+            if update.choices[0].delta.tool_calls is not None:
+                if update.choices[0].delta.tool_calls[0].function.name is not None:
+                    function_name = update.choices[0].delta.tool_calls[0].function.name
+                if update.choices[0].delta.tool_calls[0].id is not None:
+                    tool_call_id = update.choices[0].delta.tool_calls[0].id
+                function_args += update.choices[0].delta.tool_calls[0].function.arguments or ""
+        
+        # Append the previous model response to the chat history
+        messages.append(
+            AssistantMessage(
+                tool_calls=[
+                    ChatCompletionsToolCall(
+                        id=tool_call_id,
+                        function=FunctionCall(
+                            name=function_name,
+                            arguments=function_args
+                        )
+                    )
+                ]
+            )
+        )
+
+        # Make the function call
+        callable_func = locals()[function_name]
+        function_args_mapping = json.loads(function_args.replace("'", '"'))
+        function_response = callable_func(**function_args_mapping)
+
+        # Append the function response as a tool message to the chat history
+        messages.append(
+            ToolMessage(
+                tool_call_id=tool_call_id,
+                content=function_response
+            )
+        )
+
+        # With the additional tools information on hand, get another streaming response from the model
+        response = client.complete(
+            messages=messages,
+            tools=[weather_description],
+            stream=True
+        )
+
+        content = ""
+        for update in response:
+            content = content + update.choices[0].delta.content
+
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 2
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('tool_calls',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
+        assert attributes_match == True
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                                ('gen_ai.system', 'az.ai.inference'),
+                                ('gen_ai.request.model', ''),
+                                ('server.address', ''),
+                                ('gen_ai.response.id', ''),
+                                ('gen_ai.response.model', ''),
+                                ('gen_ai.usage.input_tokens', ''),
+                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"finish_reason\": \"tool_calls\", \"message\": {\"tool_calls\": [{\"id\": \"*\", \"type\": \"function\"}]}, \"index\": 0}"
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
+        assert events_match == True
+
+        expected_events = [
+            {
+                "name": "gen_ai.choice",
+                "timestamp": "*",
+                "attributes": {
+                    "gen_ai.system": "az.ai.inference",
+                    "gen_ai.event.content": "{\"finish_reason\": \"stop\", \"index\": 0}"
+                }
+            }
+        ] 
+        events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
+        assert events_match == True
+
+        AIInferenceApiInstrumentor().uninstrument()
\ No newline at end of file

From b549b38ae3a49775a5b8168cabf85bd811f1a734 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Mon, 23 Sep 2024 10:02:59 -0500
Subject: [PATCH 14/35] fix and async trace test

---
 .../test_model_inference_async_client.py      | 78 +++++++++++++++++++
 .../_ai_inference_api_instrumentor_impl.py    |  6 +-
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
index 5c1717dcd764..fd630ffa706b 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
@@ -14,14 +14,28 @@
     ServicePreparerEmbeddings,
 )
 from azure.core.pipeline.transport import AioHttpTransport
+from azure.core.settings import settings
 from devtools_testutils.aio import recorded_by_proxy_async
 from azure.core.exceptions import AzureError, ServiceRequestError
 from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor
+from memory_trace_exporter import MemoryTraceExporter
+from gen_ai_trace_verifier import GenAiTraceVerifier
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 
+CONTENT_TRACING_ENV_VARIABLE = "AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED"
+content_tracing_initial_value = os.getenv(CONTENT_TRACING_ENV_VARIABLE)
 
 # The test class name needs to start with "Test" to get collected by pytest
 class TestModelAsyncClient(ModelClientTestBase):
 
+    @classmethod
+    def teardown_class(cls):
+        if content_tracing_initial_value is not None:
+            os.environ[CONTENT_TRACING_ENV_VARIABLE] = content_tracing_initial_value
+
     # **********************************************************************************
     #
     #         EMBEDDINGS REGRESSION TESTS - NO SERVICE RESPONSE REQUIRED
@@ -677,3 +691,67 @@ async def test_async_embeddings_with_auth_failure(self, **kwargs):
             assert "auth token validation failed" in e.message.lower()
         await client.close()
         assert exception_caught
+
+    # **********************************************************************************
+    #
+    #                            TRACING TESTS - CHAT COMPLETIONS
+    #
+    # **********************************************************************************
+
+    def setup_memory_trace_exporter(self) -> MemoryTraceExporter:
+        # Setup Azure Core settings to use OpenTelemetry tracing
+        settings.tracing_implementation = "OpenTelemetry"
+        trace.set_tracer_provider(TracerProvider())
+        tracer = trace.get_tracer(__name__)
+        memoryExporter = MemoryTraceExporter()
+        span_processor = SimpleSpanProcessor(memoryExporter)
+        trace.get_tracer_provider().add_span_processor(span_processor)
+        return span_processor, memoryExporter
+
+    def modify_env_var(self, name, new_value):
+        current_value = os.getenv(name)
+        os.environ[name] = new_value
+        return current_value
+
+    @ServicePreparerChatCompletions()
+    async def test_chat_completion_async_tracing_content_recording_disabled(self, **kwargs):
+        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
+        client = self._create_async_chat_client(**kwargs)
+        processor, exporter = self.setup_memory_trace_exporter()
+        AIInferenceApiInstrumentor().instrument()
+        response = await client.complete(
+            messages=[
+                sdk.models.SystemMessage(content="You are a helpful assistant."),
+                sdk.models.UserMessage(content="What is the capital of France?"),
+            ],
+        )
+        processor.force_flush()
+        spans = exporter.get_spans_by_name_starts_with("chat ")
+        if len(spans) == 0:
+            spans = exporter.get_spans_by_name("chat")
+        assert len(spans) == 1
+        span = spans[0]
+        expected_attributes = [('gen_ai.operation.name', 'chat'),
+                               ('gen_ai.system', 'az.ai.inference'),
+                               ('gen_ai.request.model', ''),
+                               ('server.address', ''),
+                               ('gen_ai.response.id', ''),
+                               ('gen_ai.response.model', ''),
+                               ('gen_ai.usage.input_tokens', ''),
+                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.finish_reasons', ('stop',))]
+        attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
+        assert attributes_match == True
+
+        expected_events = [
+            {                
+                'name': 'gen_ai.choice',
+                'attributes': {
+                    'gen_ai.system': 'az.ai.inference',
+                    'gen_ai.event.content': '{"finish_reason": "stop", "index": 0}'
+                }
+            }
+        ]
+        events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
+        assert events_match == True
+        AIInferenceApiInstrumentor().uninstrument()
\ No newline at end of file
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
index d41ab2ad74ca..c003da6e4132 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
@@ -357,7 +357,7 @@ async def inner(*args, **kwargs):
                 # tracing events not supported in azure-core-tracing-opentelemetry
                 # so need to access the span instance directly
                 with span_impl_type.change_context(span.span_instance):
-                    _add_request_span_attributes(span, span_name, kwargs)
+                    _add_request_span_attributes(span, span_name, args, kwargs)
                     result = await function(*args, **kwargs)
                     if kwargs.get("stream") is True:
                         return _wrapped_stream(result, span)
@@ -395,7 +395,9 @@ def _inference_apis():
     sync_apis = (
         ("azure.ai.inference", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"),
     )
-    async_apis = ()
+    async_apis = (
+        ("azure.ai.inference.aio", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"),
+    )
     return sync_apis, async_apis
 
 

From 469d32ce3b2f0dc343520380d7b1f6c43eef5294 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Mon, 23 Sep 2024 16:24:30 -0500
Subject: [PATCH 15/35] updating readme and setup

---
 sdk/ai/azure-ai-inference/README.md           | 91 +++++++++++++++++++
 sdk/ai/azure-ai-inference/setup.py            |  3 +
 .../tests/test_model_inference_client.py      | 20 ++--
 3 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index faeab5703529..25d60dfe32e8 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -57,6 +57,14 @@ To update an existing installation of the package, use:
 pip install --upgrade azure-ai-inference
 ```
 
+If you want to install Azure AI Inferencing package with support for OpenTelemetry based tracing, use the following command:
+
+```bash
+pip install azure-ai-inference[trace]
+```
+
+
+
 ## Key concepts
 
 ### Create and authenticate a client directly, using API key or GitHub token
@@ -451,6 +459,89 @@ TBD
 To generate embeddings for additional phrases, simply call `client.embed` multiple times using the same `client`.
 -->
 
+## Tracing
+
+The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions.
+
+### Setup
+
+The environment variable AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED controls whether the actual message contents will be included in the traces or not. By default, the message contents are not include as part of the trace. Set the value of the environment variable to true (case insensitive) for the message contents to be included as part of the trace. Any other value will cause the message contents not to be traced.
+
+You also need to configure the tracing implementation in your code, like so:
+
+```
+from azure.core.settings import settings
+settings.tracing_implementation = "opentelemetry"
+```
+
+### Trace Exporter(s)
+
+In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. The first example below shows how to setup an exporter to Azure Monitor.
+Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to created Azure Monitor resource.
+Configure the APPLICATIONINSIGHTS_CONNECTION_STRING based on your Azure Monitor resource.
+
+```
+# Setup tracing to Azure Monitor
+from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter
+trace.set_tracer_provider(TracerProvider())
+tracer = trace.get_tracer(__name__)
+span_processor = BatchSpanProcessor(
+    AzureMonitorTraceExporter.from_connection_string(
+        os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"]
+    )
+)
+trace.get_tracer_provider().add_span_processor(span_processor)
+```
+
+The following example shows how to setup tracing to console output.
+
+```
+# Setup tracing to console
+exporter = ConsoleSpanExporter()
+trace.set_tracer_provider(TracerProvider())
+tracer = trace.get_tracer(__name__)
+trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter))
+```
+### Instrumentation
+
+Use the AIInferenceInstrumentor to instrument the Azure AI Inferencing API for LLM tracing, this will cause the LLM traces to be emitted from Azure AI Inferencing API.
+
+```
+from azure.core.tracing import AIInferenceApiInstrumentor
+# Instrument AI Inference API
+AIInferenceApiInstrumentor().instrument()
+```
+
+It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again.
+
+```
+AIInferenceApiInstrumentor().uninstrument()
+```
+
+### Tracing Your Own Functions
+The @tracer.start_as_current_span decorator can be used to trace your own functions. This will trace the function parameters and their values. You can also add further attributes to the span in the function implementation as demonstrated below. Note that you will have to setup the tracer in your code before using the decorator.
+
+```
+# The @tracer.start_as_current_span decorator will
+# trace the function call and enable adding additional attributes
+# to the span in the function implementation.
+@tracer.start_as_current_span("get_temperature")
+def get_temperature(city: str) -> str:
+
+    # Adding attributes to the current span
+    span = trace.get_current_span()
+    span.set_attribute("requested_city", city)
+
+    if city == "Seattle":
+        return "75"
+    elif city == "New York City":
+        return "80"
+    else:
+        return "Unavailable"
+
+
+```
+
 ## Troubleshooting
 
 ### Exceptions
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index c7b5395a3f9f..54039451d91a 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -68,4 +68,7 @@
         "typing-extensions>=4.6.0",
     ],
     python_requires=">=3.8",
+    extras_require={  
+        'trace': ['azure-core-tracing-opentelemetry', 'opentelemetry-sdk', 'azure-monitor-opentelemetry-exporter']  
+    }
 )
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index 852bd908d5c3..16568e77f486 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -33,14 +33,6 @@ class TestModelClient(ModelClientTestBase):
 
     @classmethod
     def teardown_class(cls):
-        mode = 'a' if os.path.exists("teardown.txt") else 'w'
-
-        with open("teardown.txt", mode) as file:
-            # Get current timestamp
-            timestamp = datetime.datetime.now()
-            text = "Setting " + CONTENT_TRACING_ENV_VARIABLE + " to " + str(content_tracing_initial_value)
-            # Add timestamp to the start of the text and write to file
-            file.write(f'{timestamp}: {text}\n')
         if content_tracing_initial_value is not None:
             os.environ[CONTENT_TRACING_ENV_VARIABLE] = content_tracing_initial_value
 
@@ -1205,7 +1197,7 @@ def get_weather(city: str) -> str:
         expected_events = [
             {
                 "name": "gen_ai.system.message",
-                "timestamp": "",
+                "timestamp": "*",
                 "attributes": {
                     "gen_ai.system": "az.ai.inference",
                     "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
@@ -1213,7 +1205,7 @@ def get_weather(city: str) -> str:
             },
             {
                 "name": "gen_ai.user.message",
-                "timestamp": "",
+                "timestamp": "*",
                 "attributes": {
                     "gen_ai.system": "az.ai.inference",
                     "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
@@ -1221,7 +1213,7 @@ def get_weather(city: str) -> str:
             },
             {
                 "name": "gen_ai.choice",
-                "timestamp": "",
+                "timestamp": "*",
                 "attributes": {
                     "gen_ai.system": "az.ai.inference",
                     "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}"
@@ -1481,7 +1473,7 @@ def get_weather(city: str) -> str:
         expected_events = [
             {
                 "name": "gen_ai.system.message",
-                "timestamp": "*",
+                "timestamp": "",
                 "attributes": {
                     "gen_ai.system": "az.ai.inference",
                     "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
@@ -1489,7 +1481,7 @@ def get_weather(city: str) -> str:
             },
             {
                 "name": "gen_ai.user.message",
-                "timestamp": "*",
+                "timestamp": "",
                 "attributes": {
                     "gen_ai.system": "az.ai.inference",
                     "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
@@ -1497,7 +1489,7 @@ def get_weather(city: str) -> str:
             },
             {
                 "name": "gen_ai.choice",
-                "timestamp": "*",
+                "timestamp": "",
                 "attributes": {
                     "gen_ai.system": "az.ai.inference",
                     "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}"

From f1424a13026bde10c40c41db450b4e183c4a8f0a Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Mon, 23 Sep 2024 16:32:08 -0500
Subject: [PATCH 16/35] adding tracing sample

---
 .../sample_chat_completions_with_tracing.py   | 168 ++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py

diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
new file mode 100644
index 000000000000..8ac19ab1d56f
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -0,0 +1,168 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+import os
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.sdk.trace.export import ConsoleSpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from azure.ai.inference import ChatCompletionsClient
+from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason
+from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor
+from azure.core.settings import settings
+
+
+# Setup tracing to console
+exporter = ConsoleSpanExporter()
+trace.set_tracer_provider(TracerProvider())
+tracer = trace.get_tracer(__name__)
+trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter))
+
+# Use the following code to setup tracing to Application Insights
+# from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter
+# trace.set_tracer_provider(TracerProvider())
+# tracer = trace.get_tracer(__name__)
+# span_processor = BatchSpanProcessor(
+#     AzureMonitorTraceExporter.from_connection_string(
+#         os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"]
+#     )
+# )
+# trace.get_tracer_provider().add_span_processor(span_processor)
+
+
+def chat_completion_streaming(key, endpoint, model_name):
+    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+    response = client.complete(
+        stream=True,
+        messages=[
+            SystemMessage(content="You are a helpful assistant."),
+            UserMessage(content="Tell me about software engineering in five sentences."),
+        ],
+        model=model_name,
+    )
+    for update in response:
+        if update.choices:
+            print(update.choices[0].delta.content or "", end="")
+            pass
+    client.close()
+
+
+# The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes
+# to the span in the function implementation. Note that this will trace the function parameters and their values.
+@tracer.start_as_current_span("get_temperature")
+def get_temperature(city: str) -> str:
+
+    # Adding attributes to the current span
+    span = trace.get_current_span()
+    span.set_attribute("requested_city", city)
+
+    if city == "Seattle":
+        return "75"
+    elif city == "New York City":
+        return "80"
+    else:
+        return "Unavailable"
+
+
+def get_weather(city: str) -> str:
+    if city == "Seattle":
+        return "Nice weather"
+    elif city == "New York City":
+        return "Good weather"
+    else:
+        return "Unavailable"
+
+
+def chat_completion_with_function_call(key, endpoint, model_name):
+    import json
+    from azure.ai.inference.models import ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
+
+    weather_description = ChatCompletionsToolDefinition(
+        function=FunctionDefinition(
+            name="get_weather",
+            description="Returns description of the weather in the specified city",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "city": {
+                        "type": "string",
+                        "description": "The name of the city for which weather info is requested",
+                    },
+                },
+                "required": ["city"],
+            },
+        )
+    )
+
+    temperature_in_city = ChatCompletionsToolDefinition(
+        function=FunctionDefinition(
+            name="get_temperature",
+            description="Returns the current temperature for the specified city",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "city": {
+                        "type": "string",
+                        "description": "The name of the city for which temperature info is requested",
+                    },
+                },
+                "required": ["city"],
+            },
+        )
+    )
+
+    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+    messages=[
+        SystemMessage(content="You are a helpful assistant."),
+        UserMessage(content="What is the weather and temperature in Seattle?"),
+    ]
+
+    response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city])
+
+    if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS:
+        # Append the previous model response to the chat history
+        messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls))
+        # The tool should be of type function call.
+        if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0:
+            for tool_call in response.choices[0].message.tool_calls:
+                if type(tool_call) is ChatCompletionsToolCall:
+                    function_args = json.loads(tool_call.function.arguments.replace("'", '"'))
+                    print(f"Calling function `{tool_call.function.name}` with arguments {function_args}")
+                    callable_func = globals()[tool_call.function.name]
+                    function_response = callable_func(**function_args)
+                    print(f"Function response = {function_response}")
+                    # Provide the tool response to the model, by appending it to the chat history
+                    messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response))
+                    # With the additional tools information on hand, get another response from the model
+            response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city])
+    
+    print(f"Model response = {response.choices[0].message.content}")
+
+
+def main():
+    # Setup Azure Core settings to use OpenTelemetry tracing
+    settings.tracing_implementation = "OpenTelemetry"
+
+    # Instrument AI Inference API
+    AIInferenceApiInstrumentor().instrument()
+
+    # Read AI Inference API configuration
+    endpoint = os.environ.get("AZUREAI_ENDPOINT_URL")
+    key = os.environ.get("AZUREAI_ENDPOINT_KEY")
+    model_name = os.environ.get("AZUREAI_MODEL_NAME")
+
+    print("===== starting chat_completion_streaming() =====")
+    chat_completion_streaming(key, endpoint, model_name)
+    print("===== chat_completion_streaming() done =====")
+
+    print("===== starting chat_completion_with_function_call() =====")
+    chat_completion_with_function_call(key, endpoint, model_name)
+    print("===== chat_completion_with_function_call() done =====")
+    AIInferenceApiInstrumentor().uninstrument()
+
+
+if __name__ == "__main__":
+    main()

From 92da09a7b15aa060e44424540385067cc4535a2c Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Wed, 25 Sep 2024 13:50:03 -0500
Subject: [PATCH 17/35] changes based on review comments

---
 sdk/ai/azure-ai-inference/README.md           | 164 +++++----
 sdk/ai/azure-ai-inference/samples/README.md   |   1 +
 .../sample_chat_completions_with_tracing.py   |  93 +++--
 sdk/ai/azure-ai-inference/setup.py            |   2 +-
 .../tests/gen_ai_trace_verifier.py            |   9 +-
 .../test_model_inference_async_client.py      |   6 +-
 .../tests/test_model_inference_client.py      | 338 +++++-------------
 .../core/tracing/ai/inference/__init__.py     |   2 +-
 ...entor.py => _ai_inference_instrumentor.py} |  13 +-
 ....py => _ai_inference_instrumentor_impl.py} |   9 +-
 .../inference/azure_telemetry_instrumentor.py |  20 --
 11 files changed, 250 insertions(+), 407 deletions(-)
 rename sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/{_ai_inference_api_instrumentor.py => _ai_inference_instrumentor.py} (64%)
 rename sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/{_ai_inference_api_instrumentor_impl.py => _ai_inference_instrumentor_impl.py} (98%)
 delete mode 100644 sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py

diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index 25d60dfe32e8..d2e593b30646 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -459,89 +459,6 @@ TBD
 To generate embeddings for additional phrases, simply call `client.embed` multiple times using the same `client`.
 -->
 
-## Tracing
-
-The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions.
-
-### Setup
-
-The environment variable AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED controls whether the actual message contents will be included in the traces or not. By default, the message contents are not include as part of the trace. Set the value of the environment variable to true (case insensitive) for the message contents to be included as part of the trace. Any other value will cause the message contents not to be traced.
-
-You also need to configure the tracing implementation in your code, like so:
-
-```
-from azure.core.settings import settings
-settings.tracing_implementation = "opentelemetry"
-```
-
-### Trace Exporter(s)
-
-In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. The first example below shows how to setup an exporter to Azure Monitor.
-Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to created Azure Monitor resource.
-Configure the APPLICATIONINSIGHTS_CONNECTION_STRING based on your Azure Monitor resource.
-
-```
-# Setup tracing to Azure Monitor
-from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter
-trace.set_tracer_provider(TracerProvider())
-tracer = trace.get_tracer(__name__)
-span_processor = BatchSpanProcessor(
-    AzureMonitorTraceExporter.from_connection_string(
-        os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"]
-    )
-)
-trace.get_tracer_provider().add_span_processor(span_processor)
-```
-
-The following example shows how to setup tracing to console output.
-
-```
-# Setup tracing to console
-exporter = ConsoleSpanExporter()
-trace.set_tracer_provider(TracerProvider())
-tracer = trace.get_tracer(__name__)
-trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter))
-```
-### Instrumentation
-
-Use the AIInferenceInstrumentor to instrument the Azure AI Inferencing API for LLM tracing, this will cause the LLM traces to be emitted from Azure AI Inferencing API.
-
-```
-from azure.core.tracing import AIInferenceApiInstrumentor
-# Instrument AI Inference API
-AIInferenceApiInstrumentor().instrument()
-```
-
-It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again.
-
-```
-AIInferenceApiInstrumentor().uninstrument()
-```
-
-### Tracing Your Own Functions
-The @tracer.start_as_current_span decorator can be used to trace your own functions. This will trace the function parameters and their values. You can also add further attributes to the span in the function implementation as demonstrated below. Note that you will have to setup the tracer in your code before using the decorator.
-
-```
-# The @tracer.start_as_current_span decorator will
-# trace the function call and enable adding additional attributes
-# to the span in the function implementation.
-@tracer.start_as_current_span("get_temperature")
-def get_temperature(city: str) -> str:
-
-    # Adding attributes to the current span
-    span = trace.get_current_span()
-    span.set_attribute("requested_city", city)
-
-    if city == "Seattle":
-        return "75"
-    elif city == "New York City":
-        return "80"
-    else:
-        return "Unavailable"
-
-
-```
-
 ## Troubleshooting
 
 ### Exceptions
@@ -625,6 +542,87 @@ To report issues with the client library, or request additional features, please
 
 * Have a look at the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples) folder, containing fully runnable Python code for doing inference using synchronous and asynchronous clients.
 
+## Tracing
+
+The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions.
+
+### Setup
+
+The environment variable AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED controls whether the actual message contents will be recorded in the traces or not. By default, the message contents are not recorded as part of the trace. When message content recording is disabled any function call tool related function names, function parameter names and function parameter values are also not recorded in the trace. Set the value of the environment variable to "true" (case insensitive) for the message contents to be recorded as part of the trace. Any other value will cause the message contents not to be recorded.
+
+You also need to configure the tracing implementation in your code by setting `AZURE_SDK_TRACING_IMPLEMENTATION` to `opentelemetry` or configuring it in the code with the following snippet:
+
+<!-- SNIPPET:sample_chat_completions_with_tracing.trace_setting -->
+
+```python
+from azure.core.settings import settings
+settings.tracing_implementation = "opentelemetry"
+```
+
+<!-- END SNIPPET -->
+
+
+Please refer to [azure-core-tracing-documentation](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme) for more information.
+
+### Trace Exporter(s)
+
+In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. You can find more information [here](https://learn.microsoft.com/en-us/python/api/overview/azure/core-tracing-opentelemetry-readme?view=azure-python-preview).
+
+Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to create Azure Monitor resource for the Azure Monitor exporter.
+
+### Instrumentation
+
+Use the AIInferenceInstrumentor to instrument the Azure AI Inferencing API for LLM tracing, this will cause the LLM traces to be emitted from Azure AI Inferencing API.
+
+<!-- SNIPPET:sample_chat_completions_with_tracing.instrument_inferencing -->
+
+```python
+from azure.core.tracing.ai.inference import AIInferenceInstrumentor
+# Instrument AI Inference API
+AIInferenceInstrumentor().instrument()
+```
+
+<!-- END SNIPPET -->
+
+
+It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again.
+
+<!-- SNIPPET:sample_chat_completions_with_tracing.uninstrument_inferencing -->
+
+```python
+AIInferenceInstrumentor().uninstrument()
+```
+
+<!-- END SNIPPET -->
+
+### Tracing Your Own Functions
+The @tracer.start_as_current_span decorator can be used to trace your own functions. This will trace the function parameters and their values. You can also add further attributes to the span in the function implementation as demonstrated below. Note that you will have to setup the tracer in your code before using the decorator. More information is available [here](https://opentelemetry.io/docs/languages/python/).
+
+<!-- SNIPPET:sample_chat_completions_with_tracing.trace_function -->
+
+```python
+from opentelemetry.trace import get_tracer
+tracer = get_tracer(__name__)
+
+# The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes
+# to the span in the function implementation. Note that this will trace the function parameters and their values.
+@tracer.start_as_current_span("get_temperature")
+def get_temperature(city: str) -> str:
+
+    # Adding attributes to the current span
+    span = trace.get_current_span()
+    span.set_attribute("requested_city", city)
+
+    if city == "Seattle":
+        return "75"
+    elif city == "New York City":
+        return "80"
+    else:
+        return "Unavailable"
+```
+
+<!-- END SNIPPET -->
+
 ## Contributing
 
 This project welcomes contributions and suggestions. Most contributions require
diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md
index 777ce3baf477..ebc8990ceb7a 100644
--- a/sdk/ai/azure-ai-inference/samples/README.md
+++ b/sdk/ai/azure-ai-inference/samples/README.md
@@ -105,6 +105,7 @@ similarly for the other samples.
 |[sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py) | Get AI model information using the chat completions client. Similarly can be done with all other clients. |
 |[sample_chat_completions_with_model_extras.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_model_extras.py) | Chat completions with additional model-specific parameters. |
 |[sample_chat_completions_azure_openai.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py) | Chat completions against Azure OpenAI endpoint. |
+|[sample_chat_completions_with_tracing.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py) | Chat completions with traces enabled. Includes streaming and non-streaming chat operations. The non-streaming chat uses function call tool and also demonstrates how to add traces to client code so that they will get included as part of the traces that are emitted. |
 
 ### Text embeddings
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
index 8ac19ab1d56f..fdd107664c0a 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -1,19 +1,45 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client. The sample also shows how to 
+    set default chat compoletions configuration in the client constructor,
+    which will be applied to all `complete` calls to the service.
+
+    This sample assumes the AI model is hosted on a Serverless API or
+    Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
+    the client constructor needs to be modified. See package documentation:
+    https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts
+
+USAGE:
+    python sample_chat_completions_with_tracing.py
+
+    Set these two environment variables before running the sample:
+    1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form 
+        https://<your-deployment-name>.<your-azure-region>.models.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
+"""
+
 
 import os
 from opentelemetry import trace
+# opentelemetry-sdk is required for the opentelemetry.sdk imports.
+# You can install it with command "pip install opentelemetry.sdk".
 from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor
-from opentelemetry.sdk.trace.export import ConsoleSpanExporter
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter
 from azure.ai.inference import ChatCompletionsClient
 from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason
 from azure.core.credentials import AzureKeyCredential
-from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor
-from azure.core.settings import settings
 
+ # [START trace_setting]
+from azure.core.settings import settings
+settings.tracing_implementation = "opentelemetry"
+# [END trace_setting]
 
 # Setup tracing to console
 exporter = ConsoleSpanExporter()
@@ -21,19 +47,8 @@
 tracer = trace.get_tracer(__name__)
 trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter))
 
-# Use the following code to setup tracing to Application Insights
-# from azure.monitor.opentelemetry.exporter import AzureMonitorTraceExporter
-# trace.set_tracer_provider(TracerProvider())
-# tracer = trace.get_tracer(__name__)
-# span_processor = BatchSpanProcessor(
-#     AzureMonitorTraceExporter.from_connection_string(
-#         os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"]
-#     )
-# )
-# trace.get_tracer_provider().add_span_processor(span_processor)
 
-
-def chat_completion_streaming(key, endpoint, model_name):
+def chat_completion_streaming(key, endpoint):
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
     response = client.complete(
         stream=True,
@@ -41,7 +56,6 @@ def chat_completion_streaming(key, endpoint, model_name):
             SystemMessage(content="You are a helpful assistant."),
             UserMessage(content="Tell me about software engineering in five sentences."),
         ],
-        model=model_name,
     )
     for update in response:
         if update.choices:
@@ -49,6 +63,9 @@ def chat_completion_streaming(key, endpoint, model_name):
             pass
     client.close()
 
+ # [START trace_function]
+from opentelemetry.trace import get_tracer
+tracer = get_tracer(__name__)
 
 # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes
 # to the span in the function implementation. Note that this will trace the function parameters and their values.
@@ -65,6 +82,7 @@ def get_temperature(city: str) -> str:
         return "80"
     else:
         return "Unavailable"
+ # [END trace_function]
 
 
 def get_weather(city: str) -> str:
@@ -76,7 +94,7 @@ def get_weather(city: str) -> str:
         return "Unavailable"
 
 
-def chat_completion_with_function_call(key, endpoint, model_name):
+def chat_completion_with_function_call(key, endpoint):
     import json
     from azure.ai.inference.models import ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
 
@@ -120,7 +138,7 @@ def chat_completion_with_function_call(key, endpoint, model_name):
         UserMessage(content="What is the weather and temperature in Seattle?"),
     ]
 
-    response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city])
+    response = client.complete(messages=messages, tools=[weather_description, temperature_in_city])
 
     if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS:
         # Append the previous model response to the chat history
@@ -137,31 +155,36 @@ def chat_completion_with_function_call(key, endpoint, model_name):
                     # Provide the tool response to the model, by appending it to the chat history
                     messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response))
                     # With the additional tools information on hand, get another response from the model
-            response = client.complete(messages=messages, model=model_name, tools=[weather_description, temperature_in_city])
+            response = client.complete(messages=messages, tools=[weather_description, temperature_in_city])
     
     print(f"Model response = {response.choices[0].message.content}")
 
 
 def main():
-    # Setup Azure Core settings to use OpenTelemetry tracing
-    settings.tracing_implementation = "OpenTelemetry"
-
+    # [START instrument_inferencing]
+    from azure.core.tracing.ai.inference import AIInferenceInstrumentor
     # Instrument AI Inference API
-    AIInferenceApiInstrumentor().instrument()
+    AIInferenceInstrumentor().instrument()
+    # [END instrument_inferencing]
 
-    # Read AI Inference API configuration
-    endpoint = os.environ.get("AZUREAI_ENDPOINT_URL")
-    key = os.environ.get("AZUREAI_ENDPOINT_KEY")
-    model_name = os.environ.get("AZUREAI_MODEL_NAME")
+    try:
+        endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"]
+        key = os.environ["AZURE_AI_CHAT_KEY"]
+    except KeyError:
+        print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'")
+        print("Set them before running this sample.")
+        exit()
 
     print("===== starting chat_completion_streaming() =====")
-    chat_completion_streaming(key, endpoint, model_name)
+    chat_completion_streaming(key, endpoint)
     print("===== chat_completion_streaming() done =====")
 
     print("===== starting chat_completion_with_function_call() =====")
-    chat_completion_with_function_call(key, endpoint, model_name)
+    chat_completion_with_function_call(key, endpoint)
     print("===== chat_completion_with_function_call() done =====")
-    AIInferenceApiInstrumentor().uninstrument()
+    # [START uninstrument_inferencing]
+    AIInferenceInstrumentor().uninstrument()
+    # [END uninstrument_inferencing]
 
 
 if __name__ == "__main__":
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index 54039451d91a..98eff8671e37 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -69,6 +69,6 @@
     ],
     python_requires=">=3.8",
     extras_require={  
-        'trace': ['azure-core-tracing-opentelemetry', 'opentelemetry-sdk', 'azure-monitor-opentelemetry-exporter']  
+        'trace': ['azure-core-tracing-opentelemetry']  
     }
 )
diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
index aeb8266abbc7..82e4b0665a09 100644
--- a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
+++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
@@ -1,7 +1,7 @@
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # ------------------------------------
-import datetime
+import numbers
 import json
 from opentelemetry.sdk.trace import Span
 
@@ -28,7 +28,12 @@ def check_span_attributes(self, span, attributes):
                     return False                    
             else:
                 # Check if the attribute value matches the provided value
-                if attribute_value != "" and span.attributes[attribute_name] != attribute_value:
+                if attribute_value == "+":
+                    if not isinstance(span.attributes[attribute_name], numbers.Number):
+                        return False
+                    if span.attributes[attribute_name] < 0:
+                        return False
+                elif attribute_value != "" and span.attributes[attribute_name] != attribute_value:
                     return False
                 # Check if the attribute value in the span is not empty when the provided value is ""
                 elif attribute_value == "" and not span.attributes[attribute_name]:
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
index fd630ffa706b..1fb76e363738 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
@@ -18,7 +18,7 @@
 from devtools_testutils.aio import recorded_by_proxy_async
 from azure.core.exceptions import AzureError, ServiceRequestError
 from azure.core.credentials import AzureKeyCredential
-from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor
+from azure.core.tracing.ai.inference import AIInferenceInstrumentor
 from memory_trace_exporter import MemoryTraceExporter
 from gen_ai_trace_verifier import GenAiTraceVerifier
 from opentelemetry import trace
@@ -718,7 +718,7 @@ async def test_chat_completion_async_tracing_content_recording_disabled(self, **
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_async_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
         response = await client.complete(
             messages=[
                 sdk.models.SystemMessage(content="You are a helpful assistant."),
@@ -754,4 +754,4 @@ async def test_chat_completion_async_tracing_content_recording_disabled(self, **
         ]
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
-        AIInferenceApiInstrumentor().uninstrument()
\ No newline at end of file
+        AIInferenceInstrumentor().uninstrument()
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index 16568e77f486..e0c27d66dbf9 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -18,7 +18,7 @@
 from devtools_testutils import recorded_by_proxy
 from azure.core.exceptions import AzureError, ServiceRequestError
 from azure.core.credentials import AzureKeyCredential
-from azure.core.tracing.ai.inference import AIInferenceApiInstrumentor
+from azure.core.tracing.ai.inference import AIInferenceInstrumentor
 from memory_trace_exporter import MemoryTraceExporter
 from gen_ai_trace_verifier import GenAiTraceVerifier
 from opentelemetry import trace
@@ -838,11 +838,11 @@ def test_instrumentation(self, **kwargs):
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         try:
-            assert AIInferenceApiInstrumentor().is_instrumented() == False
-            AIInferenceApiInstrumentor().instrument()
-            assert AIInferenceApiInstrumentor().is_instrumented() == True
-            AIInferenceApiInstrumentor().uninstrument()
-            assert AIInferenceApiInstrumentor().is_instrumented() == False
+            assert AIInferenceInstrumentor().is_instrumented() == False
+            AIInferenceInstrumentor().instrument()
+            assert AIInferenceInstrumentor().is_instrumented() == True
+            AIInferenceInstrumentor().uninstrument()
+            assert AIInferenceInstrumentor().is_instrumented() == False
         except RuntimeError as e:
             exception_caught = True
             print(e)
@@ -855,9 +855,9 @@ def test_instrumenting_twice_causes_exception(self, **kwargs):
         exception_caught = False
         instrumented_once = False
         try:
-            AIInferenceApiInstrumentor().instrument()
+            AIInferenceInstrumentor().instrument()
             instrumented_once = True
-            AIInferenceApiInstrumentor().instrument()
+            AIInferenceInstrumentor().instrument()
         except RuntimeError as e:
             exception_caught = True
             print(e)
@@ -870,7 +870,7 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         try:
-            AIInferenceApiInstrumentor().uninstrument()
+            AIInferenceInstrumentor().uninstrument()
         except RuntimeError as e:
             exception_caught = True
             print(e)
@@ -883,10 +883,10 @@ def test_uninstrumenting_twise_causes_exception(self, **kwargs):
         exception_caught = False
         uninstrumented_once = False
         try:
-            AIInferenceApiInstrumentor().instrument()
-            AIInferenceApiInstrumentor().uninstrument()
+            AIInferenceInstrumentor().instrument()
+            AIInferenceInstrumentor().uninstrument()
             uninstrumented_once = True
-            AIInferenceApiInstrumentor().uninstrument()
+            AIInferenceInstrumentor().uninstrument()
         except RuntimeError as e:
             exception_caught = True
             print(e)
@@ -899,7 +899,7 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
         response = client.complete(
             messages=[
                 sdk.models.SystemMessage(content="You are a helpful assistant."),
@@ -914,12 +914,12 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
         span = spans[0]
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
-                               ('gen_ai.request.model', ''),
-                               ('server.address', ''),
+                               ('gen_ai.request.model', 'chat'),
+                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', ''),
-                               ('gen_ai.usage.input_tokens', ''),
-                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.usage.input_tokens', '+'),
+                               ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
@@ -935,14 +935,14 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
         ]
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
         response = client.complete(
             messages=[
                 sdk.models.SystemMessage(content="You are a helpful assistant."),
@@ -957,12 +957,12 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
         span = spans[0]
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
-                               ('gen_ai.request.model', ''),
-                               ('server.address', ''),
+                               ('gen_ai.request.model', 'chat'),
+                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', ''),
-                               ('gen_ai.usage.input_tokens', ''),
-                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.usage.input_tokens', '+'),
+                               ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
@@ -992,14 +992,14 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
         ]
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs):
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
         response = client.complete(
             messages=[
                 sdk.models.SystemMessage(content="You are a helpful assistant."),
@@ -1021,12 +1021,12 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw
         span = spans[0]
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
-                               ('gen_ai.request.model', ''),
-                               ('server.address', ''),
+                               ('gen_ai.request.model', 'chat'),
+                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', ''),
-                               ('gen_ai.usage.input_tokens', ''),
-                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.usage.input_tokens', '+'),
+                               ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
@@ -1042,14 +1042,14 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw
         ]
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs):
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
         response = client.complete(
             messages=[
                 sdk.models.SystemMessage(content="You are a helpful assistant."),
@@ -1071,12 +1071,12 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa
         span = spans[0]
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
-                               ('gen_ai.request.model', ''),
-                               ('server.address', ''),
+                               ('gen_ai.request.model', 'chat'),
+                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', ''),
-                               ('gen_ai.usage.input_tokens', ''),
-                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.usage.input_tokens', '+'),
+                               ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
@@ -1106,7 +1106,7 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa
         ]
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs):
@@ -1117,7 +1117,7 @@ def test_chat_completion_with_function_call_tracing_content_recording_enabled(se
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
 
         def get_weather(city: str) -> str:
             if city == "Seattle":
@@ -1173,23 +1173,23 @@ def get_weather(city: str) -> str:
         assert len(spans) == 2
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
         assert attributes_match == True
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
         assert attributes_match == True
@@ -1268,7 +1268,7 @@ def get_weather(city: str) -> str:
         events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
         assert events_match == True
 
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs):
@@ -1279,7 +1279,7 @@ def test_chat_completion_with_function_call_tracing_content_recording_disabled(s
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
 
         def get_weather(city: str) -> str:
             if city == "Seattle":
@@ -1335,23 +1335,23 @@ def get_weather(city: str) -> str:
         assert len(spans) == 2
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
         assert attributes_match == True
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
         assert attributes_match == True
@@ -1382,169 +1382,7 @@ def get_weather(city: str) -> str:
         events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
         assert events_match == True
 
-        AIInferenceApiInstrumentor().uninstrument()
-
-    @ServicePreparerChatCompletions()
-    def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs):
-        import json
-        from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
-        from azure.ai.inference import ChatCompletionsClient
-
-        self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
-        client = self._create_chat_client(**kwargs)
-        processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
-
-        def get_weather(city: str) -> str:
-            if city == "Seattle":
-                return "Nice weather"
-            elif city == "New York City":
-                return "Good weather"
-            else:
-                return "Unavailable"
-
-        weather_description = ChatCompletionsToolDefinition(
-            function=FunctionDefinition(
-                name="get_weather",
-                description="Returns description of the weather in the specified city",
-                parameters={
-                    "type": "object",
-                    "properties": {
-                        "city": {
-                            "type": "string",
-                            "description": "The name of the city for which weather info is requested",
-                        },
-                    },
-                    "required": ["city"],
-                },
-            )
-        )
-        messages=[
-            sdk.models.SystemMessage(content="You are a helpful assistant."),
-            sdk.models.UserMessage(content="What is the weather in Seattle?"),
-        ]
-
-        response = client.complete(messages=messages, tools=[weather_description])
-
-        if response.choices[0].finish_reason == CompletionsFinishReason.TOOL_CALLS:
-            # Append the previous model response to the chat history
-            messages.append(AssistantMessage(tool_calls=response.choices[0].message.tool_calls))
-            # The tool should be of type function call.
-            if response.choices[0].message.tool_calls is not None and len(response.choices[0].message.tool_calls) > 0:
-                for tool_call in response.choices[0].message.tool_calls:
-                    if type(tool_call) is ChatCompletionsToolCall:
-                        function_args = json.loads(tool_call.function.arguments.replace("'", '"'))
-                        print(f"Calling function `{tool_call.function.name}` with arguments {function_args}")
-                        callable_func = locals()[tool_call.function.name]
-                        function_response = callable_func(**function_args)
-                        print(f"Function response = {function_response}")
-                        # Provide the tool response to the model, by appending it to the chat history
-                        messages.append(ToolMessage(tool_call_id=tool_call.id, content=function_response))
-                # With the additional tools information on hand, get another response from the model
-                response = client.complete(messages=messages, tools=[weather_description])
-        processor.force_flush()
-        spans = exporter.get_spans_by_name_starts_with("chat ")
-        if len(spans) == 0:
-            spans = exporter.get_spans_by_name("chat")
-        assert len(spans) == 2
-        expected_attributes = [('gen_ai.operation.name', 'chat'),
-                                ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
-                                ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
-                                ('gen_ai.response.finish_reasons', ('tool_calls',))]
-        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
-        assert attributes_match == True
-        expected_attributes = [('gen_ai.operation.name', 'chat'),
-                                ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
-                                ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
-                                ('gen_ai.response.finish_reasons', ('stop',))]
-        attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
-        assert attributes_match == True
-
-        expected_events = [
-            {
-                "name": "gen_ai.system.message",
-                "timestamp": "",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
-                }
-            },
-            {
-                "name": "gen_ai.user.message",
-                "timestamp": "",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
-                }
-            },
-            {
-                "name": "gen_ai.choice",
-                "timestamp": "",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"message\": {\"content\": \"\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\":\\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}, \"finish_reason\": \"tool_calls\", \"index\": 0}"
-                }
-            }
-        ]
-        events_match = GenAiTraceVerifier().check_span_events(spans[0], expected_events)
-        assert events_match == True
-
-        expected_events = [
-            {
-                "name": "gen_ai.system.message",
-                "timestamp": "*",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}"
-                }
-            },
-            {
-                "name": "gen_ai.user.message",
-                "timestamp": "*",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"role\": \"user\", \"content\": \"What is the weather in Seattle?\"}"
-                }
-            },
-            {
-                "name": "gen_ai.assistant.message",
-                "timestamp": "*",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"role\": \"assistant\", \"tool_calls\": [{\"function\": {\"arguments\": \"{\\\"city\\\": \\\"Seattle\\\"}\", \"call_id\": null, \"name\": \"get_weather\"}, \"id\": \"*\", \"type\": \"function\"}]}"
-                }
-            },
-            {
-                "name": "gen_ai.tool.message",
-                "timestamp": "*",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"role\": \"tool\", \"tool_call_id\": \"*\", \"content\": \"Nice weather\"}"
-                }
-            },
-            {
-                "name": "gen_ai.choice",
-                "timestamp": "*",
-                "attributes": {
-                    "gen_ai.system": "az.ai.inference",
-                    "gen_ai.event.content": "{\"message\": {\"content\": \"*\"}, \"finish_reason\": \"stop\", \"index\": 0}"
-                }
-            }
-        ] 
-        events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
-        assert events_match == True
-
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs):
@@ -1555,7 +1393,7 @@ def test_chat_completion_with_function_call_streaming_tracing_content_recording_
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
 
         def get_weather(city: str) -> str:
             if city == "Seattle":
@@ -1649,23 +1487,23 @@ def get_weather(city: str) -> str:
         assert len(spans) == 2
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
         assert attributes_match == True
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
         assert attributes_match == True
@@ -1744,7 +1582,7 @@ def get_weather(city: str) -> str:
         events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
         assert events_match == True
 
-        AIInferenceApiInstrumentor().uninstrument()
+        AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs):
@@ -1755,7 +1593,7 @@ def test_chat_completion_with_function_call_streaming_tracing_content_recording_
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
-        AIInferenceApiInstrumentor().instrument()
+        AIInferenceInstrumentor().instrument()
 
         def get_weather(city: str) -> str:
             if city == "Seattle":
@@ -1849,23 +1687,23 @@ def get_weather(city: str) -> str:
         assert len(spans) == 2
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[0], expected_attributes)
         assert attributes_match == True
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
-                                ('gen_ai.request.model', ''),
-                                ('server.address', ''),
+                                ('gen_ai.request.model', 'chat'),
+                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', ''),
-                                ('gen_ai.usage.input_tokens', ''),
-                                ('gen_ai.usage.output_tokens', ''),
+                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.usage.input_tokens', '+'),
+                                ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(spans[1], expected_attributes)
         assert attributes_match == True
@@ -1896,4 +1734,4 @@ def get_weather(city: str) -> str:
         events_match = GenAiTraceVerifier().check_span_events(spans[1], expected_events)        
         assert events_match == True
 
-        AIInferenceApiInstrumentor().uninstrument()
\ No newline at end of file
+        AIInferenceInstrumentor().uninstrument()
\ No newline at end of file
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
index bd8ddc1e73b7..88064b3607a6 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
@@ -2,4 +2,4 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-from ._ai_inference_api_instrumentor import AIInferenceApiInstrumentor
+from ._ai_inference_instrumentor import AIInferenceInstrumentor
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
similarity index 64%
rename from sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
index 5156b77ee11b..883ecd63b2b9 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
@@ -2,10 +2,9 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import os
-from .azure_telemetry_instrumentor import AzureTelemetryInstrumentor
 
 
-class AIInferenceApiInstrumentor(AzureTelemetryInstrumentor):
+class AIInferenceInstrumentor:
     def __init__(self):
         super().__init__()
 
@@ -20,16 +19,16 @@ def instrument(self):
         
         var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
         enable_content_tracing = self.str_to_bool(var_value)
-        from ._ai_inference_api_instrumentor_impl import _inject_inference_api
-        _inject_inference_api(enable_content_tracing)
+        from ._ai_inference_instrumentor_impl import _instrument_inference
+        _instrument_inference(enable_content_tracing)
 
     def uninstrument(self):
         if not self.is_instrumented():
             raise RuntimeError("Not instrumented")
 
-        from ._ai_inference_api_instrumentor_impl import _restore_inference_api
-        _restore_inference_api()
+        from ._ai_inference_instrumentor_impl import _uninstrument_inference
+        _uninstrument_inference()
 
     def is_instrumented(self):
-        from ._ai_inference_api_instrumentor_impl import _is_instrumented
+        from ._ai_inference_instrumentor_impl import _is_instrumented
         return _is_instrumented()
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
similarity index 98%
rename from sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
rename to sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
index c003da6e4132..17702c2de7eb 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_api_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
@@ -16,7 +16,6 @@
 from azure.core.tracing import AbstractSpan
 from azure.core.tracing import SpanKind
 from azure.core.settings import settings
-from azure.core.tracing.common import get_function_and_class_name
 from opentelemetry.trace import Status, StatusCode, Span
 
 _inference_traces_enabled: bool = False
@@ -278,7 +277,7 @@ def inner(*args, **kwargs):
         if span_impl_type is None:
             return function(*args, **kwargs)
 
-        class_function_name = get_function_and_class_name(function, *args)
+        class_function_name = function.__qualname__
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
             if kwargs.get('model') is None:
@@ -343,7 +342,7 @@ async def inner(*args, **kwargs):
         if span_impl_type is None:
             return function(*args, **kwargs)
 
-        class_function_name = get_function_and_class_name(function, *args)
+        class_function_name = function.__qualname__
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
             if kwargs.get('model') is None:
@@ -436,7 +435,7 @@ def available_inference_apis_and_injectors():
     yield from _generate_api_and_injector(_inference_api_list())
 
 
-def _inject_inference_api(enable_content_tracing: bool = False):
+def _instrument_inference(enable_content_tracing: bool = False):
     """This function modifies the methods of the Inference API classes to inject logic before calling the original methods.
     The original methods are stored as _original attributes of the methods.
     """
@@ -452,7 +451,7 @@ def _inject_inference_api(enable_content_tracing: bool = False):
             setattr(api, method, injector(getattr(api, method), trace_type, name))
 
 
-def _restore_inference_api():
+def _uninstrument_inference():
     """This function restores the original methods of the Inference API classes
     by assigning them back from the _original attributes of the modified methods.
     """
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py
deleted file mode 100644
index 7950a442363e..000000000000
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/azure_telemetry_instrumentor.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-from abc import ABC, abstractmethod  
-  
-class AzureTelemetryInstrumentor(ABC):
-    def __init__(self):
-        pass
-
-    @abstractmethod
-    def instrument(self):
-        pass
-
-    @abstractmethod
-    def uninstrument(self):
-        pass
-
-    @abstractmethod
-    def is_instrumented(self):
-        pass
\ No newline at end of file

From d9652f5ec12bb87de5386ba1698e560c69673d33 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Thu, 26 Sep 2024 09:45:39 -0500
Subject: [PATCH 18/35] changed to readme based on review comments

---
 sdk/ai/azure-ai-inference/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index d2e593b30646..bc5cd5b7a679 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -564,11 +564,12 @@ settings.tracing_implementation = "opentelemetry"
 
 Please refer to [azure-core-tracing-documentation](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme) for more information.
 
-### Trace Exporter(s)
+### Exporting Traces with OpenTelemetry
 
-In order for the traces to be captured, you need to setup the applicable trace exporters. The chosen exporter will be based on where you want the traces to be output. You can also implement your own exporter. You can find more information [here](https://learn.microsoft.com/en-us/python/api/overview/azure/core-tracing-opentelemetry-readme?view=azure-python-preview).
+Azure AI Inference is instrumented with OpenTelemetry. In order to enable tracing you need to configure OpenTelemetry to export traces to your observability backend. 
+Refer to [Azure SDK tracing in Python](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme?view=azure-python-preview) for more details.
 
-Please refer to [this](https://learn.microsoft.com/en-us/azure/azure-monitor/app/create-workspace-resource?tabs=bicep) documentation for more information about how to create Azure Monitor resource for the Azure Monitor exporter.
+Refer to [Azure Monitor OpenTelemetry documentation](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-enable?tabs=python) for the details on how to send Azure AI Inference traces to Azure Monitor and create Azure Monitor resource.
 
 ### Instrumentation
 

From 6da2a7d143bd15355ff02544992755dbab179265 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Thu, 26 Sep 2024 12:27:21 -0500
Subject: [PATCH 19/35] removed distributed_trace and some other updates

---
 sdk/ai/azure-ai-inference/README.md                   | 11 +++++------
 .../azure-ai-inference/azure/ai/inference/_patch.py   |  1 -
 .../azure/ai/inference/aio/_patch.py                  |  1 -
 .../samples/sample_chat_completions_with_defaults.py  |  2 +-
 .../samples/sample_chat_completions_with_tracing.py   |  8 ++++----
 .../azure-ai-inference/tests/gen_ai_trace_verifier.py |  1 +
 .../azure-ai-inference/tests/memory_trace_exporter.py |  2 +-
 7 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index bc5cd5b7a679..85e493b1047b 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -538,10 +538,6 @@ For more information, see [Configure logging in the Azure libraries for Python](
 
 To report issues with the client library, or request additional features, please open a GitHub issue [here](https://github.com/Azure/azure-sdk-for-python/issues)
 
-## Next steps
-
-* Have a look at the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples) folder, containing fully runnable Python code for doing inference using synchronous and asynchronous clients.
-
 ## Tracing
 
 The Azure AI Inferencing API Tracing library provides tracing for Azure AI Inference client library for Python. Refer to Installation chapter above for installation instructions.
@@ -561,7 +557,6 @@ settings.tracing_implementation = "opentelemetry"
 
 <!-- END SNIPPET -->
 
-
 Please refer to [azure-core-tracing-documentation](https://learn.microsoft.com/python/api/overview/azure/core-tracing-opentelemetry-readme) for more information.
 
 ### Exporting Traces with OpenTelemetry
@@ -586,7 +581,7 @@ AIInferenceInstrumentor().instrument()
 <!-- END SNIPPET -->
 
 
-It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the LLM traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again.
+It is also possible to uninstrument the Azure AI Inferencing API by using the uninstrument call. After this call, the traces will no longer be emitted by the Azure AI Inferencing API until instrument is called again.
 
 <!-- SNIPPET:sample_chat_completions_with_tracing.uninstrument_inferencing -->
 
@@ -624,6 +619,10 @@ def get_temperature(city: str) -> str:
 
 <!-- END SNIPPET -->
 
+## Next steps
+
+* Have a look at the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples) folder, containing fully runnable Python code for doing inference using synchronous and asynchronous clients.
+
 ## Contributing
 
 This project welcomes contributions and suggestions. Most contributions require
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index 362fa75e2a91..9860f70a7a68 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -454,7 +454,6 @@ def complete(
         :raises ~azure.core.exceptions.HttpResponseError:
         """
 
-    @distributed_trace
     def complete(
         self,
         body: Union[JSON, IO[bytes]] = _Unset,
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index ac31fdb88108..121401ce0f65 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -437,7 +437,6 @@ async def complete(
         :raises ~azure.core.exceptions.HttpResponseError:
         """
 
-    @distributed_trace_async
     async def complete(
         self,
         body: Union[JSON, IO[bytes]] = _Unset,
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py
index 36f43a5601a4..011735a7e61f 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_defaults.py
@@ -6,7 +6,7 @@
 DESCRIPTION:
     This sample demonstrates how to get a chat completions response from
     the service using a synchronous client. The sample also shows how to 
-    set default chat compoletions configuration in the client constructor,
+    set default chat completions configuration in the client constructor,
     which will be applied to all `complete` calls to the service.
 
     This sample assumes the AI model is hosted on a Serverless API or
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
index fdd107664c0a..8580f1a51dff 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -4,10 +4,10 @@
 # ------------------------------------
 """
 DESCRIPTION:
-    This sample demonstrates how to get a chat completions response from
-    the service using a synchronous client. The sample also shows how to 
-    set default chat compoletions configuration in the client constructor,
-    which will be applied to all `complete` calls to the service.
+    This sample demonstrates how to use tracing with the Inference client library.
+    Azure AI Inference is instrumented with OpenTelemetry. In order to enable tracing
+    you need to configure OpenTelemetry to export traces to your observability backend.
+    This sample shows how to capture the traces to a file.
 
     This sample assumes the AI model is hosted on a Serverless API or
     Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
index 82e4b0665a09..29bb2ef57f47 100644
--- a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
+++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
@@ -1,5 +1,6 @@
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
 # ------------------------------------
 import numbers
 import json
diff --git a/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py
index 7563e65cfc87..7b609fbf5724 100644
--- a/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py
+++ b/sdk/ai/azure-ai-inference/tests/memory_trace_exporter.py
@@ -35,5 +35,5 @@ def get_spans_by_name_starts_with(self, name_prefix: str) -> List[Span]:
     def get_spans_by_name(self, name: str) -> List[Span]:
         return [span for span in self._trace_list if span.name == name]
     
-    def get_spans(self) -> List[Span]:  
+    def get_spans(self) -> List[Span]:
         return [span for span in self._trace_list]
\ No newline at end of file

From 521f7f0c280cdf4d1a7fb8b2d5c3f33206a81894 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Thu, 26 Sep 2024 14:00:43 -0500
Subject: [PATCH 20/35] fixing pre python v3.10 issue

---
 .../tracing/ai/inference/_ai_inference_instrumentor_impl.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
index 17702c2de7eb..b0543da2f361 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
@@ -10,7 +10,7 @@
 import logging
 from urllib.parse import urlparse
 from enum import Enum
-from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict
+from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict, Union
 from azure.ai.inference.aio import ChatCompletionsClient
 from azure.ai.inference import models as _models
 from azure.core.tracing import AbstractSpan
@@ -141,7 +141,7 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom
         span.span_instance.add_event(name="gen_ai.choice", attributes=attributes)
 
 
-def _add_response_chat_attributes(span: AbstractSpan, result: _models.ChatCompletions | _models.StreamingChatCompletionsUpdate) -> None:
+def _add_response_chat_attributes(span: AbstractSpan,  result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate]) -> None:
 
     _set_attributes(
         span,

From 8c800992d4679db9458c50707390114e32191ca9 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Thu, 26 Sep 2024 16:16:18 -0500
Subject: [PATCH 21/35] test fixes

---
 .../test_model_inference_async_client.py      |  8 +--
 .../tests/test_model_inference_client.py      | 49 ++++++++++---------
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
index 1fb76e363738..70648401659a 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
@@ -733,12 +733,12 @@ async def test_chat_completion_async_tracing_content_recording_disabled(self, **
         span = spans[0]
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
-                               ('gen_ai.request.model', ''),
+                               ('gen_ai.request.model', 'chat'),
                                ('server.address', ''),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', ''),
-                               ('gen_ai.usage.input_tokens', ''),
-                               ('gen_ai.usage.output_tokens', ''),
+                               ('gen_ai.response.model', 'mistral-large'),
+                               ('gen_ai.usage.input_tokens', '+'),
+                               ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index e0c27d66dbf9..cabd63362ee1 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -861,6 +861,7 @@ def test_instrumenting_twice_causes_exception(self, **kwargs):
         except RuntimeError as e:
             exception_caught = True
             print(e)
+        AIInferenceInstrumentor().uninstrument()
         client.close()
         assert instrumented_once == True
         assert exception_caught == True
@@ -915,9 +916,9 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
                                ('gen_ai.request.model', 'chat'),
-                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                               ('server.address', ''),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.response.model', 'mistral-large'),
                                ('gen_ai.usage.input_tokens', '+'),
                                ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
@@ -958,9 +959,9 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
                                ('gen_ai.request.model', 'chat'),
-                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                               ('server.address', ''),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.response.model', 'mistral-large'),
                                ('gen_ai.usage.input_tokens', '+'),
                                ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
@@ -1022,9 +1023,9 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
                                ('gen_ai.request.model', 'chat'),
-                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                               ('server.address', ''),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.response.model', 'mistral-large'),
                                ('gen_ai.usage.input_tokens', '+'),
                                ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
@@ -1072,9 +1073,9 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                ('gen_ai.system', 'az.ai.inference'),
                                ('gen_ai.request.model', 'chat'),
-                               ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                               ('server.address', ''),
                                ('gen_ai.response.id', ''),
-                               ('gen_ai.response.model', 'mistral-small'),
+                               ('gen_ai.response.model', 'mistral-large'),
                                ('gen_ai.usage.input_tokens', '+'),
                                ('gen_ai.usage.output_tokens', '+'),
                                ('gen_ai.response.finish_reasons', ('stop',))]
@@ -1174,9 +1175,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
@@ -1185,9 +1186,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
@@ -1336,9 +1337,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
@@ -1347,9 +1348,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
@@ -1488,9 +1489,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
@@ -1499,9 +1500,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]
@@ -1688,9 +1689,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('tool_calls',))]
@@ -1699,9 +1700,9 @@ def get_weather(city: str) -> str:
         expected_attributes = [('gen_ai.operation.name', 'chat'),
                                 ('gen_ai.system', 'az.ai.inference'),
                                 ('gen_ai.request.model', 'chat'),
-                                ('server.address', 'mistral-small-ofgon-serverless.eastus2.inference.ai.azure.com'),
+                                ('server.address', ''),
                                 ('gen_ai.response.id', ''),
-                                ('gen_ai.response.model', 'mistral-small'),
+                                ('gen_ai.response.model', 'mistral-large'),
                                 ('gen_ai.usage.input_tokens', '+'),
                                 ('gen_ai.usage.output_tokens', '+'),
                                 ('gen_ai.response.finish_reasons', ('stop',))]

From 514dea4fd6d6fb353198c8b2ae61c67d8b2f6b7f Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Thu, 26 Sep 2024 14:59:04 -0700
Subject: [PATCH 22/35] Fix some of the non-trace tests

---
 sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py        | 4 ++--
 sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py    | 2 +-
 .../tests/test_model_inference_async_client.py                | 4 ++--
 .../azure-ai-inference/tests/test_model_inference_client.py   | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index 9860f70a7a68..ce60a5d0c9b3 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -102,8 +102,8 @@ def load_client(
             "The AI model information is missing a value for `model type`. Cannot create an appropriate client."
         )
 
-    # TODO: Remove "completions" and "embedding" once Mistral Large and Cohere fixes their model type
-    if model_info.model_type in (_models.ModelType.CHAT, "completion"):
+    # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type
+    if model_info.model_type in (_models.ModelType.CHAT, "completion", "chat-completion", "chat-completions"):
         chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
         chat_completion_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
             model_info
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index 121401ce0f65..e0a25e7ef1a5 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -87,7 +87,7 @@ async def load_client(
         )
 
     # TODO: Remove "completions" and "embedding" once Mistral Large and Cohere fixes their model type
-    if model_info.model_type in (_models.ModelType.CHAT, "completion"):
+    if model_info.model_type in (_models.ModelType.CHAT, "completion", "chat-completion", "chat-completions"):
         chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
         chat_completion_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
             model_info
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
index 1fb76e363738..2c5515fb243a 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
@@ -492,7 +492,7 @@ async def test_async_load_chat_completions_client(self, **kwargs):
         response1 = await client.get_model_info()
         self._print_model_info_result(response1)
         self._validate_model_info_result(
-            response1, "completion"
+            response1, "chat-completion" # TODO: This should be chat_completions based on REST API spec...
         )  # TODO: This should be ModelType.CHAT once the model is fixed
         await client.close()
 
@@ -506,7 +506,7 @@ async def test_async_get_model_info_on_chat_client(self, **kwargs):
         assert client._model_info  # pylint: disable=protected-access
         self._print_model_info_result(response1)
         self._validate_model_info_result(
-            response1, "completion"
+            response1, "chat-completion"
         )  # TODO: This should be ModelType.CHAT once the model is fixed
 
         # Get the model info again. No network calls should be made here,
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index e0c27d66dbf9..534d1c23224d 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -544,7 +544,7 @@ def test_load_chat_completions_client(self, **kwargs):
         response1 = client.get_model_info()
         self._print_model_info_result(response1)
         self._validate_model_info_result(
-            response1, "completion"
+            response1, "chat-completion"
         )  # TODO: This should be ModelType.CHAT once the model is fixed
         client.close()
 
@@ -560,7 +560,7 @@ def test_get_model_info_on_chat_client(self, **kwargs):
 
         self._print_model_info_result(response1)
         self._validate_model_info_result(
-            response1, "completion"
+            response1, "chat-completion" # TODO: This should be chat_comletions according to REST API spec...
         )  # TODO: This should be ModelType.CHAT once the model is fixed
 
         # Get the model info again. No network calls should be made here,

From 83f85d64e7e5c5dc3db2a05c3656d01a5c08eb2b Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 27 Sep 2024 10:18:52 -0500
Subject: [PATCH 23/35] fixing issues reported by tools

---
 .vscode/cspell.json                                 |  3 +++
 .../azure-ai-inference/azure/ai/inference/_patch.py |  1 +
 .../azure/ai/inference/aio/_patch.py                |  1 +
 .../samples/sample_chat_completions_with_tracing.py | 13 +++++++------
 .../tests/test_model_inference_client.py            |  2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/.vscode/cspell.json b/.vscode/cspell.json
index 628b0a8ee30a..888f65b392ac 100644
--- a/.vscode/cspell.json
+++ b/.vscode/cspell.json
@@ -401,6 +401,9 @@
     "uamqp",
     "uksouth",
     "ukwest",
+    "uninstrument",
+    "uninstrumented",
+    "uninstrumenting",
     "unpad",
     "unpadder",
     "unpartial",
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index 9860f70a7a68..362fa75e2a91 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -454,6 +454,7 @@ def complete(
         :raises ~azure.core.exceptions.HttpResponseError:
         """
 
+    @distributed_trace
     def complete(
         self,
         body: Union[JSON, IO[bytes]] = _Unset,
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index 121401ce0f65..ac31fdb88108 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -437,6 +437,7 @@ async def complete(
         :raises ~azure.core.exceptions.HttpResponseError:
         """
 
+    @distributed_trace_async
     async def complete(
         self,
         body: Union[JSON, IO[bytes]] = _Unset,
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
index 8580f1a51dff..392d779d6377 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -30,8 +30,8 @@
 from opentelemetry import trace
 # opentelemetry-sdk is required for the opentelemetry.sdk imports.
 # You can install it with command "pip install opentelemetry.sdk".
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter
+#from opentelemetry.sdk.trace import TracerProvider
+#from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter
 from azure.ai.inference import ChatCompletionsClient
 from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason
 from azure.core.credentials import AzureKeyCredential
@@ -42,10 +42,11 @@
 # [END trace_setting]
 
 # Setup tracing to console
-exporter = ConsoleSpanExporter()
-trace.set_tracer_provider(TracerProvider())
-tracer = trace.get_tracer(__name__)
-trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter))
+# Requires opentelemetry-sdk
+#exporter = ConsoleSpanExporter()
+#trace.set_tracer_provider(TracerProvider())
+#tracer = trace.get_tracer(__name__)
+#trace.get_tracer_provider().add_span_processor(SimpleSpanProcessor(exporter))
 
 
 def chat_completion_streaming(key, endpoint):
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index cabd63362ee1..1c4b4fe26f39 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -879,7 +879,7 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
         assert exception_caught == True
 
     @ServicePreparerChatCompletions()
-    def test_uninstrumenting_twise_causes_exception(self, **kwargs):
+    def test_uninstrumenting_twice_causes_exception(self, **kwargs):
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         uninstrumented_once = False

From e8dd67d50e5db14ca79ec2fd38b656376b33e886 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 27 Sep 2024 11:53:19 -0500
Subject: [PATCH 24/35] adding uninstrumentation to the beginning of tracing
 tests

---
 .../test_model_inference_async_client.py      |  5 ++
 .../tests/test_model_inference_client.py      | 60 +++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
index 8619325504b0..db955f0cc010 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
@@ -715,6 +715,11 @@ def modify_env_var(self, name, new_value):
 
     @ServicePreparerChatCompletions()
     async def test_chat_completion_async_tracing_content_recording_disabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_async_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index 7f6d83c7acf2..3acbe369ce57 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -835,6 +835,11 @@ def modify_env_var(self, name, new_value):
 
     @ServicePreparerChatCompletions()
     def test_instrumentation(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         try:
@@ -851,6 +856,11 @@ def test_instrumentation(self, **kwargs):
 
     @ServicePreparerChatCompletions()
     def test_instrumenting_twice_causes_exception(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         instrumented_once = False
@@ -868,6 +878,11 @@ def test_instrumenting_twice_causes_exception(self, **kwargs):
 
     @ServicePreparerChatCompletions()
     def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         try:
@@ -880,6 +895,11 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
 
     @ServicePreparerChatCompletions()
     def test_uninstrumenting_twice_causes_exception(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         client = self._create_chat_client(**kwargs)
         exception_caught = False
         uninstrumented_once = False
@@ -897,6 +917,11 @@ def test_uninstrumenting_twice_causes_exception(self, **kwargs):
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
@@ -940,6 +965,11 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
@@ -997,6 +1027,11 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "False")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
@@ -1047,6 +1082,11 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         self.modify_env_var(CONTENT_TRACING_ENV_VARIABLE, "True")
         client = self._create_chat_client(**kwargs)
         processor, exporter = self.setup_memory_trace_exporter()
@@ -1111,6 +1151,11 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         import json
         from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
         from azure.ai.inference import ChatCompletionsClient
@@ -1273,6 +1318,11 @@ def get_weather(city: str) -> str:
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         import json
         from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
         from azure.ai.inference import ChatCompletionsClient
@@ -1387,6 +1437,11 @@ def get_weather(city: str) -> str:
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         import json
         from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
         from azure.ai.inference import ChatCompletionsClient
@@ -1587,6 +1642,11 @@ def get_weather(city: str) -> str:
 
     @ServicePreparerChatCompletions()
     def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs):
+        # Make sure code is not instrumented due to a previous test exception
+        try:
+            AIInferenceInstrumentor().uninstrument()
+        except RuntimeError as e:
+            pass
         import json
         from azure.ai.inference.models import SystemMessage, UserMessage, CompletionsFinishReason, FunctionCall, ToolMessage, AssistantMessage, ChatCompletionsToolCall, ChatCompletionsToolDefinition, FunctionDefinition
         from azure.ai.inference import ChatCompletionsClient

From 0c286c3efca8b7d5fb12452550928a9e5f9eb14d Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 27 Sep 2024 15:24:20 -0500
Subject: [PATCH 25/35] updating readme and sample

---
 sdk/ai/azure-ai-inference/samples/README.md              | 9 +--------
 .../samples/sample_chat_completions_with_tracing.py      | 3 ++-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md
index ebc8990ceb7a..34fc9920f2fa 100644
--- a/sdk/ai/azure-ai-inference/samples/README.md
+++ b/sdk/ai/azure-ai-inference/samples/README.md
@@ -24,14 +24,7 @@ See [Prerequisites](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/
 
 * Clone or download this sample repository
 * Open a command prompt / terminal window in this samples folder
-* Install the client library for Python with pip:
-  ```bash
-  pip install azure-ai-inference
-  ```
-  or update an existing installation:
-  ```bash
-  pip install --upgrade azure-ai-inference
-  ```
+* Install the client library for Python with pip. See [Install the package](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#install-the-package)
 * If you plan to run the asynchronous client samples, insall the additional package [aiohttp](https://pypi.org/project/aiohttp/):
   ```bash
   pip install aiohttp
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
index 392d779d6377..ae97a00de6c2 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -70,7 +70,8 @@ def chat_completion_streaming(key, endpoint):
 
 # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes
 # to the span in the function implementation. Note that this will trace the function parameters and their values.
-@tracer.start_as_current_span("get_temperature")
+# Uncomment the following line to add instrumentation for the function call.
+#@tracer.start_as_current_span("get_temperature")
 def get_temperature(city: str) -> str:
 
     # Adding attributes to the current span

From 1aaf87c6f8479e95fee57d2a499a9f00bdd4d160 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Fri, 27 Sep 2024 16:12:22 -0500
Subject: [PATCH 26/35] adding ignore related to tool issue

---
 .../samples/sample_chat_completions_with_tracing.py            | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
index ae97a00de6c2..cf712218092b 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -70,8 +70,7 @@ def chat_completion_streaming(key, endpoint):
 
 # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes
 # to the span in the function implementation. Note that this will trace the function parameters and their values.
-# Uncomment the following line to add instrumentation for the function call.
-#@tracer.start_as_current_span("get_temperature")
+@tracer.start_as_current_span("get_temperature") # type: ignore
 def get_temperature(city: str) -> str:
 
     # Adding attributes to the current span

From 510a6cab4b39fc5921c0ec64e0c5c7a6c84e2511 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Mon, 30 Sep 2024 09:45:49 -0500
Subject: [PATCH 27/35] updating code snippet in readme

---
 sdk/ai/azure-ai-inference/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index 85e493b1047b..da6b1a87226c 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -602,7 +602,7 @@ tracer = get_tracer(__name__)
 
 # The tracer.start_as_current_span decorator will trace the function call and enable adding additional attributes
 # to the span in the function implementation. Note that this will trace the function parameters and their values.
-@tracer.start_as_current_span("get_temperature")
+@tracer.start_as_current_span("get_temperature") # type: ignore
 def get_temperature(city: str) -> str:
 
     # Adding attributes to the current span

From fa8e8b0bb2d70c5833b59a294f05c9b5080519e3 Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Tue, 1 Oct 2024 14:29:19 -0700
Subject: [PATCH 28/35] Add missing `@recorded_by_proxy` decorators to new
 tracing tests

---
 .../tests/test_model_inference_async_client.py       |  1 +
 .../tests/test_model_inference_client.py             | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
index db955f0cc010..5678c62fc7e6 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_async_client.py
@@ -714,6 +714,7 @@ def modify_env_var(self, name, new_value):
         return current_value
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy_async
     async def test_chat_completion_async_tracing_content_recording_disabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
diff --git a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
index 3acbe369ce57..60ba93a83092 100644
--- a/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_model_inference_client.py
@@ -834,6 +834,7 @@ def modify_env_var(self, name, new_value):
         return current_value
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_instrumentation(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -855,6 +856,7 @@ def test_instrumentation(self, **kwargs):
         assert exception_caught == False
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_instrumenting_twice_causes_exception(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -877,6 +879,7 @@ def test_instrumenting_twice_causes_exception(self, **kwargs):
         assert exception_caught == True
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -894,6 +897,7 @@ def test_uninstrumenting_uninstrumented_causes_exception(self, **kwargs):
         assert exception_caught == True
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_uninstrumenting_twice_causes_exception(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -916,6 +920,7 @@ def test_uninstrumenting_twice_causes_exception(self, **kwargs):
         assert exception_caught == True
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -964,6 +969,7 @@ def test_chat_completion_tracing_content_recording_disabled(self, **kwargs):
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -1026,6 +1032,7 @@ def test_chat_completion_tracing_content_recording_enabled(self, **kwargs):
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -1081,6 +1088,7 @@ def test_chat_completion_streaming_tracing_content_recording_disabled(self, **kw
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -1150,6 +1158,7 @@ def test_chat_completion_streaming_tracing_content_recording_enabled(self, **kwa
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_with_function_call_tracing_content_recording_enabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -1317,6 +1326,7 @@ def get_weather(city: str) -> str:
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_with_function_call_tracing_content_recording_disabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -1436,6 +1446,7 @@ def get_weather(city: str) -> str:
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_with_function_call_streaming_tracing_content_recording_enabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:
@@ -1641,6 +1652,7 @@ def get_weather(city: str) -> str:
         AIInferenceInstrumentor().uninstrument()
 
     @ServicePreparerChatCompletions()
+    @recorded_by_proxy
     def test_chat_completion_with_function_call_streaming_tracing_content_recording_disabled(self, **kwargs):
         # Make sure code is not instrumented due to a previous test exception
         try:

From e410c311058989c8c37badb0667a56f2b4ccbeb1 Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Tue, 1 Oct 2024 14:30:12 -0700
Subject: [PATCH 29/35] Push new recordings

---
 sdk/ai/azure-ai-inference/assets.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json
index 08e25bc30c7d..fdb9e05b8246 100644
--- a/sdk/ai/azure-ai-inference/assets.json
+++ b/sdk/ai/azure-ai-inference/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-inference",
-  "Tag": "python/ai/azure-ai-inference_498e85cbfd"
+  "Tag": "python/ai/azure-ai-inference_19a0adafc6"
 }

From 18b3d92999eb2d6ae2f2930800c4ba28d837c58e Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Wed, 2 Oct 2024 14:28:57 -0500
Subject: [PATCH 30/35] fixing issues reported by tools

---
 .../core/tracing/ai/inference/__init__.py     |   4 +
 .../inference/_ai_inference_instrumentor.py   |  14 +-
 .../_ai_inference_instrumentor_impl.py        | 279 +++++++++++-------
 .../dev_requirements.txt                      |   1 +
 .../azure-core-tracing-opentelemetry/setup.py |   3 +-
 5 files changed, 182 insertions(+), 119 deletions(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
index 88064b3607a6..9797b8c02824 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/__init__.py
@@ -3,3 +3,7 @@
 # Licensed under the MIT License.
 # ------------------------------------
 from ._ai_inference_instrumentor import AIInferenceInstrumentor
+
+__all__ = [
+    "AIInferenceInstrumentor",
+]
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
index 883ecd63b2b9..11113e7f6d48 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
@@ -5,21 +5,19 @@
 
 
 class AIInferenceInstrumentor:
-    def __init__(self):
-        super().__init__()
-
     def str_to_bool(self, s):
-        if s is None:  
-            return False  
-        return str(s).lower() == 'true'
+        if s is None:
+            return False
+        return str(s).lower() == "true"
 
     def instrument(self):
         if self.is_instrumented():
             raise RuntimeError("Already instrumented")
-        
+
         var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
         enable_content_tracing = self.str_to_bool(var_value)
         from ._ai_inference_instrumentor_impl import _instrument_inference
+
         _instrument_inference(enable_content_tracing)
 
     def uninstrument(self):
@@ -27,8 +25,10 @@ def uninstrument(self):
             raise RuntimeError("Not instrumented")
 
         from ._ai_inference_instrumentor_impl import _uninstrument_inference
+
         _uninstrument_inference()
 
     def is_instrumented(self):
         from ._ai_inference_instrumentor_impl import _is_instrumented
+
         return _is_instrumented()
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
index b0543da2f361..a72942c9c451 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
@@ -2,7 +2,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
-import asyncio
 import copy
 import functools
 import importlib
@@ -11,19 +10,22 @@
 from urllib.parse import urlparse
 from enum import Enum
 from typing import Any, Iterator, Callable, Optional, List, Tuple, Dict, Union
-from azure.ai.inference.aio import ChatCompletionsClient
+from opentelemetry.trace import StatusCode, Span
+
+# pylint: disable = no-name-in-module
+from azure.core import CaseInsensitiveEnumMeta  # type: ignore
 from azure.ai.inference import models as _models
-from azure.core.tracing import AbstractSpan
-from azure.core.tracing import SpanKind
+
+# pylint: disable = no-name-in-module
+from azure.core.tracing import AbstractSpan, SpanKind  # type: ignore
 from azure.core.settings import settings
-from opentelemetry.trace import Status, StatusCode, Span
 
 _inference_traces_enabled: bool = False
 _trace_inference_content: bool = False
 INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference"
 
 
-class TraceType(str, Enum):
+class TraceType(str, Enum, metaclass=CaseInsensitiveEnumMeta):  # pylint: disable=C4747
     """An enumeration class to represent different types of traces."""
 
     INFERENCE = "Inference"
@@ -47,27 +49,27 @@ def _add_request_chat_message_event(span: AbstractSpan, **kwargs: Any) -> None:
             name = f"gen_ai.{message.get('role')}.message"
             span.span_instance.add_event(
                 name=name,
-                attributes={
-                    "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                    "gen_ai.event.content": json.dumps(message)
-                }
+                attributes={"gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME, "gen_ai.event.content": json.dumps(message)},
             )
 
 
-def parse_url(url):  
-    parsed = urlparse(url)  
-    server_address = parsed.hostname  
-    port = parsed.port  
-    return server_address, port 
+def parse_url(url):
+    parsed = urlparse(url)
+    server_address = parsed.hostname
+    port = parsed.port
+    return server_address, port
 
 
 def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any) -> None:
     client = args[0]
-    endpoint = client._config.endpoint
+    endpoint = client._config.endpoint  # pylint: disable=protected-access
     server_address, port = parse_url(endpoint)
-    model = 'chat'
-    if kwargs.get('model') is not None:
-        model = kwargs.get('model')
+    model = "chat"
+    if kwargs.get("model") is not None:
+        model_value = kwargs.get("model")
+        if model_value is not None:
+            model = model_value
+
     _set_attributes(
         span,
         ("gen_ai.operation.name", "chat"),
@@ -85,40 +87,48 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any)
 def remove_function_call_names_and_arguments(tool_calls: list) -> list:
     tool_calls_copy = copy.deepcopy(tool_calls)
     for tool_call in tool_calls_copy:
-        if 'function' in tool_call:
-            if 'name' in tool_call['function']:
-                del tool_call['function']['name']
-            if 'arguments' in tool_call['function']:
-                del tool_call['function']['arguments']
-            if not tool_call['function']:
-                del tool_call['function']
+        if "function" in tool_call:
+            if "name" in tool_call["function"]:
+                del tool_call["function"]["name"]
+            if "arguments" in tool_call["function"]:
+                del tool_call["function"]["arguments"]
+            if not tool_call["function"]:
+                del tool_call["function"]
     return tool_calls_copy
 
 
 def get_finish_reasons(result):
     if hasattr(result, "choices") and result.choices:
-        return [getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none" for choice in result.choices]
-    else:
-        return None
+        return [
+            (
+                getattr(choice, "finish_reason", None).value
+                if getattr(choice, "finish_reason", None) is not None
+                else "none"
+            )
+            for choice in result.choices
+        ]
+    return None
 
 
 def get_finish_reason_for_choice(choice):
-    return getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none"
+    return (
+        getattr(choice, "finish_reason", None).value if getattr(choice, "finish_reason", None) is not None else "none"
+    )
 
 
 def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCompletions) -> None:
     for choice in result.choices:
         if _trace_inference_content:
-            response: Dict[str, Any] = {
+            full_response: Dict[str, Any] = {
                 "message": {"content": choice.message.content},
                 "finish_reason": get_finish_reason_for_choice(choice),
                 "index": choice.index,
             }
             if choice.message.tool_calls:
-                response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
-            attributes={
+                full_response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
+            attributes = {
                 "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                "gen_ai.event.content": json.dumps(response)
+                "gen_ai.event.content": json.dumps(full_response),
             }
         else:
             response: Dict[str, Any] = {
@@ -127,35 +137,46 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom
             }
             if choice.message.tool_calls:
                 response["message"] = {}
-                tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(choice.message.tool_calls)
-                response["message"]["tool_calls"] = [tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed]
-                attributes={
+                tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(
+                    choice.message.tool_calls
+                )
+                response["message"]["tool_calls"] = [
+                    tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed
+                ]
+                attributes = {
                     "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                    "gen_ai.event.content": json.dumps(response)
+                    "gen_ai.event.content": json.dumps(response),
                 }
             else:
-                attributes={
+                attributes = {
                     "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                    "gen_ai.event.content": json.dumps(response)
+                    "gen_ai.event.content": json.dumps(response),
                 }
         span.span_instance.add_event(name="gen_ai.choice", attributes=attributes)
 
 
-def _add_response_chat_attributes(span: AbstractSpan,  result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate]) -> None:
+def _add_response_chat_attributes(
+    span: AbstractSpan, result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate]
+) -> None:
 
     _set_attributes(
         span,
         ("gen_ai.response.id", result.id),
         ("gen_ai.response.model", result.model),
-        ("gen_ai.usage.input_tokens", result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None),
-        ("gen_ai.usage.output_tokens", result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None),
+        (
+            "gen_ai.usage.input_tokens",
+            result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None,
+        ),
+        (
+            "gen_ai.usage.output_tokens",
+            result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None,
+        ),
     )
     finish_reasons = get_finish_reasons(result)
     span.add_attribute("gen_ai.response.finish_reasons", finish_reasons)
 
 
-def _add_request_span_attributes(span: AbstractSpan, span_name: str, args: Any, kwargs: Any) -> None:
-    global _trace_inference_content
+def _add_request_span_attributes(span: AbstractSpan, _span_name: str, args: Any, kwargs: Any) -> None:
     _add_request_chat_attributes(span, *args, **kwargs)
     if _trace_inference_content:
         _add_request_chat_message_event(span, **kwargs)
@@ -183,7 +204,9 @@ def _accumulate_response(item, accumulate: Dict[str, Any]) -> None:
         if item.delta.tool_calls is not None:
             for tool_call in item.delta.tool_calls:
                 if tool_call.id:
-                    accumulate["message"]["tool_calls"].append({"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}})
+                    accumulate["message"]["tool_calls"].append(
+                        {"id": tool_call.id, "type": "", "function": {"name": "", "arguments": ""}}
+                    )
                 if tool_call.function:
                     accumulate["message"]["tool_calls"][-1]["type"] = "function"
                 if tool_call.function and tool_call.function.name:
@@ -192,21 +215,24 @@ def _accumulate_response(item, accumulate: Dict[str, Any]) -> None:
                     accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments
 
 
-def _wrapped_stream(stream_obj: _models.StreamingChatCompletions, span: AbstractSpan) ->  _models.StreamingChatCompletions:
+def _wrapped_stream(
+    stream_obj: _models.StreamingChatCompletions, span: AbstractSpan
+) -> _models.StreamingChatCompletions:
     class StreamWrapper(_models.StreamingChatCompletions):
         def __init__(self, stream_obj):
             super().__init__(stream_obj._response)
 
         def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
-            global _trace_inference_content
             try:
                 accumulate: Dict[str, Any] = {}
+                chunk = None
                 for chunk in stream_obj:
                     for item in chunk.choices:
                         _accumulate_response(item, accumulate)
                     yield chunk
 
-                _add_response_chat_attributes(span, chunk)
+                if chunk is not None:
+                    _add_response_chat_attributes(span, chunk)
 
             except Exception as exc:
                 # Set the span status to error
@@ -226,22 +252,26 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
                     accumulate["index"] = 0
                     # Delete message if content tracing is not enabled
                     if not _trace_inference_content:
-                        if 'message' in accumulate:
-                            if 'content' in accumulate['message']:
-                                del accumulate['message']['content']
-                            if not accumulate['message']:
-                                del accumulate['message']
-                        if 'message' in accumulate:
-                            if 'tool_calls' in accumulate['message']:
-                                tool_calls_function_names_and_arguments_removed = remove_function_call_names_and_arguments(accumulate['message']['tool_calls'])
-                                accumulate['message']['tool_calls'] = [tool for tool in tool_calls_function_names_and_arguments_removed]
+                        if "message" in accumulate:
+                            if "content" in accumulate["message"]:
+                                del accumulate["message"]["content"]
+                            if not accumulate["message"]:
+                                del accumulate["message"]
+                        if "message" in accumulate:
+                            if "tool_calls" in accumulate["message"]:
+                                tool_calls_function_names_and_arguments_removed = (
+                                    remove_function_call_names_and_arguments(accumulate["message"]["tool_calls"])
+                                )
+                                accumulate["message"]["tool_calls"] = list(
+                                    tool_calls_function_names_and_arguments_removed
+                                )
 
                 span.span_instance.add_event(
                     name="gen_ai.choice",
                     attributes={
                         "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                        "gen_ai.event.content": json.dumps(accumulate)
-                    }
+                        "gen_ai.event.content": json.dumps(accumulate),
+                    },
                 )
                 span.finish()
 
@@ -249,25 +279,26 @@ def __iter__(self) -> Iterator[_models.StreamingChatCompletionsUpdate]:
 
 
 def _trace_sync_function(
-    function: Callable = None,
+    function: Callable,
     *,
-    args_to_ignore: Optional[List[str]] = None,
-    trace_type=TraceType.INFERENCE,
-    name: Optional[str] = None,
+    _args_to_ignore: Optional[List[str]] = None,
+    _trace_type=TraceType.INFERENCE,
+    _name: Optional[str] = None,
 ) -> Callable:
     """
     Decorator that adds tracing to a synchronous function.
 
-    Args:
-        function (Callable): The function to be traced.
-        args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace.
-                                                        Defaults to None.
-        trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE.
-        name (str, optional): The name of the trace, will set to func name if not provided.
-
-
-    Returns:
-        Callable: The traced function.
+    :param function: The function to be traced.
+    :type function: Callable
+    :param args_to_ignore: A list of argument names to be ignored in the trace.
+                           Defaults to None.
+    :type: args_to_ignore: [List[str]], optional
+    :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE.
+    :type trace_type: TraceType, optional
+    :param name: The name of the trace, will set to func name if not provided.
+    :type name: str, optional
+    :return: The traced function.
+    :rtype: Callable
     """
 
     @functools.wraps(function)
@@ -280,10 +311,10 @@ def inner(*args, **kwargs):
         class_function_name = function.__qualname__
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
-            if kwargs.get('model') is None:
-                span_name = f"chat"
+            if kwargs.get("model") is None:
+                span_name = "chat"
             else:
-                model = kwargs.get('model')
+                model = kwargs.get("model")
                 span_name = f"chat {model}"
 
             span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
@@ -310,29 +341,33 @@ def inner(*args, **kwargs):
             span.finish()
             return result
 
+        # Handle the default case (if the function name does not match)
+        return None  # Ensure all paths return
+
     return inner
 
 
 def _trace_async_function(
-    function: Callable = None,
+    function: Callable,
     *,
-    args_to_ignore: Optional[List[str]] = None,
-    trace_type=TraceType.INFERENCE,
-    name: Optional[str] = None,
+    _args_to_ignore: Optional[List[str]] = None,
+    _trace_type=TraceType.INFERENCE,
+    _name: Optional[str] = None,
 ) -> Callable:
     """
     Decorator that adds tracing to an asynchronous function.
 
-    Args:
-        function (Callable): The function to be traced.
-        args_to_ignore (Optional[List[str]], optional): A list of argument names to be ignored in the trace.
-                                                        Defaults to None.
-        trace_type (TraceType, optional): The type of the trace. Defaults to TraceType.INFERENCE.
-        name (str, optional): The name of the trace, will set to func name if not provided.
-
-
-    Returns:
-        Callable: The traced function.
+    :param function: The function to be traced.
+    :type function: Callable
+    :param args_to_ignore: A list of argument names to be ignored in the trace.
+                           Defaults to None.
+    :type: args_to_ignore: [List[str]], optional
+    :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE.
+    :type trace_type: TraceType, optional
+    :param name: The name of the trace, will set to func name if not provided.
+    :type name: str, optional
+    :return: The traced function.
+    :rtype: Callable
     """
 
     @functools.wraps(function)
@@ -345,10 +380,10 @@ async def inner(*args, **kwargs):
         class_function_name = function.__qualname__
 
         if class_function_name.startswith("ChatCompletionsClient.complete"):
-            if kwargs.get('model') is None:
-                span_name = f"chat"
+            if kwargs.get("model") is None:
+                span_name = "chat"
             else:
-                model = kwargs.get('model')
+                model = kwargs.get("model")
                 span_name = f"chat {model}"
 
             span = span_impl_type(name=span_name, kind=SpanKind.CLIENT)
@@ -378,24 +413,36 @@ async def inner(*args, **kwargs):
     return inner
 
 
-def inject_async(f, trace_type, name):
+def inject_async(f, _trace_type, _name):
     wrapper_fun = _trace_async_function(f)
-    wrapper_fun._original = f
+    wrapper_fun._original = f  # pylint: disable=protected-access
     return wrapper_fun
 
 
-def inject_sync(f, trace_type, name):
+def inject_sync(f, _trace_type, _name):
     wrapper_fun = _trace_sync_function(f)
-    wrapper_fun._original = f
+    wrapper_fun._original = f  # pylint: disable=protected-access
     return wrapper_fun
 
 
 def _inference_apis():
     sync_apis = (
-        ("azure.ai.inference", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"),
+        (
+            "azure.ai.inference",
+            "ChatCompletionsClient",
+            "complete",
+            TraceType.INFERENCE,
+            "inference_chat_completions_complete",
+        ),
     )
     async_apis = (
-        ("azure.ai.inference.aio", "ChatCompletionsClient", "complete", TraceType.INFERENCE, "inference_chat_completions_complete"),
+        (
+            "azure.ai.inference.aio",
+            "ChatCompletionsClient",
+            "complete",
+            TraceType.INFERENCE,
+            "inference_chat_completions_complete",
+        ),
     )
     return sync_apis, async_apis
 
@@ -407,8 +454,8 @@ def _inference_api_list():
 
 
 def _generate_api_and_injector(apis):
-    for apis, injector in apis:
-        for module_name, class_name, method_name, trace_type, name in apis:
+    for api, injector in apis:
+        for module_name, class_name, method_name, trace_type, name in api:
             try:
                 module = importlib.import_module(module_name)
                 api = getattr(module, class_name)
@@ -417,11 +464,11 @@ def _generate_api_and_injector(apis):
             except AttributeError as e:
                 # Log the attribute exception with the missing class information
                 logging.warning(
-                    f"AttributeError: The module '{module_name}' does not have the class '{class_name}'. {str(e)}"
+                    "AttributeError: The module '%s' does not have the class '%s'. %s", module_name, class_name, str(e)
                 )
-            except Exception as e:
+            except Exception as e:  # pylint: disable=broad-except
                 # Log other exceptions as a warning, as we're not sure what they might be
-                logging.warning(f"An unexpected error occurred: {str(e)}")
+                logging.warning("An unexpected error occurred: '%s'", str(e))
 
 
 def available_inference_apis_and_injectors():
@@ -429,16 +476,23 @@ def available_inference_apis_and_injectors():
     Generates a sequence of tuples containing Inference API classes, method names, and
     corresponding injector functions.
 
-    Yields:
-        Tuples of (api_class, method_name, injector_function)
+    :return: A generator yielding tuples.
+    :rtype: tuple
     """
     yield from _generate_api_and_injector(_inference_api_list())
 
 
 def _instrument_inference(enable_content_tracing: bool = False):
-    """This function modifies the methods of the Inference API classes to inject logic before calling the original methods.
+    """This function modifies the methods of the Inference API classes to
+    inject logic before calling the original methods.
     The original methods are stored as _original attributes of the methods.
+
+    :param enable_content_tracing: Indicates whether tracing of message content should be enabled.
+                                   This also controls whether function call tool function names,
+                                   parameter names and parameter values are traced.
+    :type enable_content_tracing: bool
     """
+    # pylint: disable=W0603
     global _inference_traces_enabled
     global _trace_inference_content
     if _inference_traces_enabled:
@@ -455,6 +509,7 @@ def _uninstrument_inference():
     """This function restores the original methods of the Inference API classes
     by assigning them back from the _original attributes of the modified methods.
     """
+    # pylint: disable=W0603
     global _inference_traces_enabled
     global _trace_inference_content
     _trace_inference_content = False
@@ -465,8 +520,10 @@ def _uninstrument_inference():
 
 
 def _is_instrumented():
-    """This function returns True if Inference API has already been instrumented
-    for tracing and False if the API has not been instrumented.
+    """This function returns True if Inference libary has already been instrumented
+    for tracing and False if it has not been instrumented.
+
+    :return: A value indicating whether the Inference library is currently instrumented or not.
+    :rtype: bool
     """
-    global _inference_traces_enabled
     return _inference_traces_enabled
diff --git a/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt b/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt
index 4397c64cc730..8c560bae2c94 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt
+++ b/sdk/core/azure-core-tracing-opentelemetry/dev_requirements.txt
@@ -6,3 +6,4 @@ requests
 azure-storage-blob
 ../../servicebus/azure-servicebus
 ../../eventhub/azure-eventhub
+../../ai/azure-ai-inference
\ No newline at end of file
diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py
index 929e1cb3fee6..aa49df834875 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/setup.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py
@@ -64,6 +64,7 @@
     python_requires=">=3.8",
     install_requires=[
         "opentelemetry-api>=1.12.0",
-        "azure-core>=1.24.0",
+        "azure-core>=1.30.0",
+        "azure-ai-inference>=1.0.0b4"
     ],
 )

From 4a563540d1870a0305158615bc20a95d500e8b1a Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Wed, 2 Oct 2024 15:06:11 -0500
Subject: [PATCH 31/35] adding inference to shared requirements

---
 shared_requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/shared_requirements.txt b/shared_requirements.txt
index e9fc401f0cd5..4cc574fd9157 100644
--- a/shared_requirements.txt
+++ b/shared_requirements.txt
@@ -57,6 +57,7 @@ opentelemetry-instrumentation-urllib3
 opentelemetry-resource-detector-azure
 azure-nspkg
 azure-ai-nspkg
+azure-ai-inference
 azure-cognitiveservices-nspkg
 azure-mgmt-nspkg
 azure-mixedreality-authentication

From 58a754f410a96fab1a92de3e56c4e2135dd53741 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Wed, 2 Oct 2024 17:49:57 -0500
Subject: [PATCH 32/35] remove inference from setup

---
 sdk/core/azure-core-tracing-opentelemetry/setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py
index aa49df834875..0513a8f12f8b 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/setup.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py
@@ -64,7 +64,6 @@
     python_requires=">=3.8",
     install_requires=[
         "opentelemetry-api>=1.12.0",
-        "azure-core>=1.30.0",
-        "azure-ai-inference>=1.0.0b4"
+        "azure-core>=1.30.0"
     ],
 )

From 4ed67dc466e7fa8a41d79cdc69ef91781360137f Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Thu, 3 Oct 2024 10:07:52 -0500
Subject: [PATCH 33/35] adding comma to setup

---
 sdk/core/azure-core-tracing-opentelemetry/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py
index 0513a8f12f8b..929e1cb3fee6 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/setup.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py
@@ -64,6 +64,6 @@
     python_requires=">=3.8",
     install_requires=[
         "opentelemetry-api>=1.12.0",
-        "azure-core>=1.30.0"
+        "azure-core>=1.24.0",
     ],
 )

From 5a0aa713889a15c9269267aa9f8a94175bc02af9 Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Thu, 3 Oct 2024 10:53:06 -0500
Subject: [PATCH 34/35] updating version requirement for core

---
 sdk/core/azure-core-tracing-opentelemetry/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/core/azure-core-tracing-opentelemetry/setup.py b/sdk/core/azure-core-tracing-opentelemetry/setup.py
index 929e1cb3fee6..600f3d09d35e 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/setup.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/setup.py
@@ -64,6 +64,6 @@
     python_requires=">=3.8",
     install_requires=[
         "opentelemetry-api>=1.12.0",
-        "azure-core>=1.24.0",
+        "azure-core>=1.30.0",
     ],
 )

From 121497809fab0abf06979599d1b0dda2ac70c52d Mon Sep 17 00:00:00 2001
From: Marko Hietala <markhiet@microsoft.com>
Date: Mon, 7 Oct 2024 15:28:30 -0500
Subject: [PATCH 35/35] changes based on review comments

---
 .../azure/ai/inference/_patch.py              |  4 +-
 .../azure/ai/inference/aio/_patch.py          |  2 +-
 .../sample_chat_completions_with_tracing.py   |  2 +-
 .../inference/_ai_inference_instrumentor.py   | 36 ++++++++++++-
 .../_ai_inference_instrumentor_impl.py        | 50 ++++++++++++-------
 5 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index ec641d8c6444..4ff53fa7360f 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -102,7 +102,7 @@ def load_client(
             "The AI model information is missing a value for `model type`. Cannot create an appropriate client."
         )
 
-    # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type
+    # TODO: Remove "completions", "chat-completions" and "embedding" once Mistral Large and Cohere fixes their model type
     if model_info.model_type in (_models.ModelType.CHAT, "completion", "chat-completion", "chat-completions"):
         chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
         chat_completion_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
@@ -454,7 +454,7 @@ def complete(
         :raises ~azure.core.exceptions.HttpResponseError:
         """
 
-    @distributed_trace
+    # pylint:disable=client-method-missing-tracing-decorator
     def complete(
         self,
         body: Union[JSON, IO[bytes]] = _Unset,
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index 8b49d289bfd5..f8cdd4f892aa 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -630,7 +630,7 @@ async def complete(
 
         return _deserialize(_models._patch.ChatCompletions, response.json())  # pylint: disable=protected-access
 
-    @distributed_trace_async
+    # pylint:disable=client-method-missing-tracing-decorator-async
     async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
         """Returns information about the AI model.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
index cf712218092b..8fb1c1c67123 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py
@@ -29,7 +29,7 @@
 import os
 from opentelemetry import trace
 # opentelemetry-sdk is required for the opentelemetry.sdk imports.
-# You can install it with command "pip install opentelemetry.sdk".
+# You can install it with command "pip install opentelemetry-sdk".
 #from opentelemetry.sdk.trace import TracerProvider
 #from opentelemetry.sdk.trace.export import SimpleSpanProcessor, ConsoleSpanExporter
 from azure.ai.inference import ChatCompletionsClient
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
index 11113e7f6d48..150134ed610c 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor.py
@@ -5,22 +5,48 @@
 
 
 class AIInferenceInstrumentor:
-    def str_to_bool(self, s):
+    """
+    A class for managing the trace instrumentation of AI Inference.
+
+    This class allows enabling or disabling tracing for AI Inference.
+    and provides functionality to check whether instrumentation is active.
+    """
+
+    def _str_to_bool(self, s):
         if s is None:
             return False
         return str(s).lower() == "true"
 
     def instrument(self):
+        """
+        Enable instrumentation for AI Inference.
+
+        Raises:
+            RuntimeError: If instrumentation is already enabled.
+
+        This method checks the environment variable
+        'AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED' to determine
+        whether to enable content tracing.
+        """
         if self.is_instrumented():
             raise RuntimeError("Already instrumented")
 
         var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
-        enable_content_tracing = self.str_to_bool(var_value)
+        enable_content_tracing = self._str_to_bool(var_value)
         from ._ai_inference_instrumentor_impl import _instrument_inference
 
         _instrument_inference(enable_content_tracing)
 
     def uninstrument(self):
+        """
+        Disable instrumentation for AI Inference.
+
+        Raises:
+            RuntimeError: If instrumentation is not currently enabled.
+
+        This method removes any active instrumentation, stopping the tracing
+        of AI Inference.
+        """
         if not self.is_instrumented():
             raise RuntimeError("Not instrumented")
 
@@ -29,6 +55,12 @@ def uninstrument(self):
         _uninstrument_inference()
 
     def is_instrumented(self):
+        """
+        Check if instrumentation for AI Inference is currently enabled.
+
+        :return: True if instrumentation is active, False otherwise.
+        :rtype: bool
+        """
         from ._ai_inference_instrumentor_impl import _is_instrumented
 
         return _is_instrumented()
diff --git a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
index a72942c9c451..54a219c5aa31 100644
--- a/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
+++ b/sdk/core/azure-core-tracing-opentelemetry/azure/core/tracing/ai/inference/_ai_inference_instrumentor_impl.py
@@ -84,6 +84,7 @@ def _add_request_chat_attributes(span: AbstractSpan, *args: Any, **kwargs: Any)
         span.add_attribute("server.port", port)
 
 
+# When content tracing is not enabled, function calls, function parameter names and values are not traced.
 def remove_function_call_names_and_arguments(tool_calls: list) -> list:
     tool_calls_copy = copy.deepcopy(tool_calls)
     for tool_call in tool_calls_copy:
@@ -99,14 +100,24 @@ def remove_function_call_names_and_arguments(tool_calls: list) -> list:
 
 def get_finish_reasons(result):
     if hasattr(result, "choices") and result.choices:
-        return [
-            (
-                getattr(choice, "finish_reason", None).value
-                if getattr(choice, "finish_reason", None) is not None
-                else "none"
-            )
-            for choice in result.choices
-        ]
+        finish_reasons = []
+        for choice in result.choices:
+            finish_reason = getattr(choice, "finish_reason", None)
+
+            if finish_reason is None:
+                # If finish_reason is None, default to "none"
+                finish_reasons.append("none")
+            elif hasattr(finish_reason, "value"):
+                # If finish_reason has a 'value' attribute (i.e., it's an enum), use it
+                finish_reasons.append(finish_reason.value)
+            elif isinstance(finish_reason, str):
+                # If finish_reason is a string, use it directly
+                finish_reasons.append(finish_reason)
+            else:
+                # For any other type, you might want to handle it or default to "none"
+                finish_reasons.append("none")
+
+        return finish_reasons
     return None
 
 
@@ -143,15 +154,11 @@ def _add_response_chat_message_event(span: AbstractSpan, result: _models.ChatCom
                 response["message"]["tool_calls"] = [
                     tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed
                 ]
-                attributes = {
-                    "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                    "gen_ai.event.content": json.dumps(response),
-                }
-            else:
-                attributes = {
-                    "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
-                    "gen_ai.event.content": json.dumps(response),
-                }
+
+            attributes = {
+                "gen_ai.system": INFERENCE_GEN_AI_SYSTEM_NAME,
+                "gen_ai.event.content": json.dumps(response),
+            }
         span.span_instance.add_event(name="gen_ai.choice", attributes=attributes)
 
 
@@ -332,7 +339,8 @@ def inner(*args, **kwargs):
                 # Set the span status to error
                 if isinstance(span.span_instance, Span):
                     span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
-                module = exc.__module__ if exc.__module__ != "builtins" else ""
+                module = getattr(exc, "__module__", "")
+                module = module if module != "builtins" else ""
                 error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
                 _set_attributes(span, ("error.type", error_type))
                 span.finish()
@@ -401,7 +409,8 @@ async def inner(*args, **kwargs):
                 # Set the span status to error
                 if isinstance(span.span_instance, Span):
                     span.span_instance.set_status(StatusCode.ERROR, description=str(exc))
-                module = exc.__module__ if exc.__module__ != "builtins" else ""
+                module = getattr(exc, "__module__", "")
+                module = module if module != "builtins" else ""
                 error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
                 _set_attributes(span, ("error.type", error_type))
                 span.finish()
@@ -410,6 +419,9 @@ async def inner(*args, **kwargs):
             span.finish()
             return result
 
+        # Handle the default case (if the function name does not match)
+        return None  # Ensure all paths return
+
     return inner