diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 39317ea752..02e207ca7f 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -192,6 +192,7 @@ def create_chat_completion_message_event( request_model, request_id, llm_metadata_dict, + all_token_counts, response_id=None, ): if not transaction: @@ -224,6 +225,8 @@ def create_chat_completion_message_event( "vendor": "bedrock", "ingest_source": "Python", } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -263,6 +266,8 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -272,24 +277,21 @@ def create_chat_completion_message_event( transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) -def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) - request_config = request_body.get("textGenerationConfig", {}) - input_message_list = [{"role": "user", "content": request_body.get("inputText")}] - - bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") - bedrock_attrs["request.temperature"] = request_config.get("temperature") + bedrock_attrs["input"] = request_body.get("inputText") return bedrock_attrs -def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): - request_body = json.loads(request_body) - bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] - bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") - bedrock_attrs["request.temperature"] = request_body.get("temperature") +def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs): + if response_body: + response_body = json.loads(response_body) + + input_tokens = response_body.get("inputTextTokenCount", 0) + bedrock_attrs["response.usage.total_tokens"] = input_tokens + return bedrock_attrs @@ -297,16 +299,31 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) + input_tokens = response_body.get("inputTextTokenCount", 0) + completion_tokens = sum(result.get("tokenCount", 0) for result in response_body.get("results", [])) + total_tokens = input_tokens + completion_tokens + output_message_list = [ - {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) + {"role": "assistant", "content": result.get("outputText")} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = input_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["output_message_list"] = output_message_list return bedrock_attrs +def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): + request_body = json.loads(request_body) + bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] + bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") + bedrock_attrs["request.temperature"] = request_body.get("temperature") + return bedrock_attrs + + def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) @@ -319,17 +336,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): return bedrock_attrs -def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): - if response_body: - if "outputText" in response_body: - bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) - messages.append({"role": "assistant", "content": response_body["outputText"]}) - - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) - - return bedrock_attrs - - def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs): if response_body: outputs = response_body.get("outputs") @@ -338,14 +344,46 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock "output_message_list", [{"role": "assistant", "content": ""}] ) bedrock_attrs["output_message_list"][0]["content"] += outputs[0].get("text", "") - bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason", None) + bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason") return bedrock_attrs -def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) + request_config = request_body.get("textGenerationConfig", {}) - bedrock_attrs["input"] = request_body.get("inputText") + input_message_list = [{"role": "user", "content": request_body.get("inputText")}] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") + bedrock_attrs["request.temperature"] = request_config.get("temperature") + + return bedrock_attrs + + +def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): + if response_body: + if "outputText" in response_body: + bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) + messages.append({"role": "assistant", "content": response_body["outputText"]}) + + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -415,6 +453,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs): bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list + bedrock_attrs[""] = str(response_body.get("id")) + + # Extract token information + token_usage = response_body.get("usage", {}) + if token_usage: + prompt_tokens = token_usage.get("input_tokens", 0) + completion_tokens = token_usage.get("output_tokens", 0) + total_tokens = prompt_tokens + completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens return bedrock_attrs @@ -427,6 +476,22 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs @@ -447,6 +512,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) output_message_list = [{"role": "assistant", "content": response_body.get("generation")}] + prompt_tokens = response_body.get("prompt_token_count", 0) + completion_tokens = response_body.get("generation_token_count", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list @@ -460,6 +532,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs) bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -500,12 +588,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"] bedrock_attrs["response_id"] = str(response_body.get("id")) + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs NULL_EXTRACTOR = lambda *args: {} # noqa: E731 # Empty extractor that returns nothing MODEL_EXTRACTORS = [ # Order is important here, avoiding dictionaries - ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), + ( + "amazon.titan-embed", + extract_bedrock_titan_embedding_model_request, + extract_bedrock_titan_embedding_model_response, + NULL_EXTRACTOR, + ), ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), ( "amazon.titan", @@ -557,8 +666,8 @@ def handle_bedrock_exception( input_message_list = [] bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens", None) - bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature", None) + bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens") + bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature") try: request_extractor(request_body, bedrock_attrs) @@ -808,6 +917,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): try: # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore response = wrapped(*args, **kwargs) + except Exception as exc: handle_bedrock_exception( exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True @@ -855,6 +965,10 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp for result in response.get("output").get("message").get("content", []) ] + response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None + response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None + response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None + bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), "model": model, @@ -862,9 +976,12 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "trace_id": trace_id, "response.choices.finish_reason": response.get("stopReason"), "output_message_list": output_message_list, - "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), - "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), + "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens"), + "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature"), "input_message_list": input_message_list, + "response.usage.prompt_tokens": response_prompt_tokens, + "response.usage.completion_tokens": response_completion_tokens, + "response.usage.total_tokens": response_total_tokens, } return bedrock_attrs @@ -1015,29 +1132,34 @@ def handle_embedding_event(transaction, bedrock_attrs): custom_attrs_dict = transaction._custom_params llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + model = bedrock_attrs.get("model") input_ = bedrock_attrs.get("input") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + embedding_dict = { "vendor": "bedrock", "ingest_source": "Python", "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request_id": request_id, - "duration": bedrock_attrs.get("duration", None), + "duration": bedrock_attrs.get("duration"), "request.model": model, "response.model": model, - "error": bedrock_attrs.get("error", None), + "response.usage.total_tokens": total_tokens, + "error": bedrock_attrs.get("error"), } + embedding_dict.update(llm_metadata_dict) if settings.ai_monitoring.record_content.enabled: @@ -1048,6 +1170,7 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs): + settings = transaction.settings or global_settings() chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1056,11 +1179,15 @@ def handle_chat_completion_event(transaction, bedrock_attrs): llm_context_attrs = getattr(transaction, "_llm_context_attrs", None) if llm_context_attrs: llm_metadata_dict.update(llm_context_attrs) - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - response_id = bedrock_attrs.get("response_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + response_id = bedrock_attrs.get("response_id") + model = bedrock_attrs.get("model") + + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens") + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) @@ -1075,6 +1202,25 @@ def handle_chat_completion_event(transaction, bedrock_attrs): len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed + input_message_content = " ".join([msg.get("content") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + + output_message_content = " ".join([msg.get("content") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + chat_completion_summary_dict = { "vendor": "bedrock", "ingest_source": "Python", @@ -1083,15 +1229,21 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "trace_id": trace_id, "request_id": request_id, "response_id": response_id, - "duration": bedrock_attrs.get("duration", None), - "request.max_tokens": bedrock_attrs.get("request.max_tokens", None), - "request.temperature": bedrock_attrs.get("request.temperature", None), + "duration": bedrock_attrs.get("duration"), + "request.max_tokens": bedrock_attrs.get("request.max_tokens"), + "request.temperature": bedrock_attrs.get("request.temperature"), "request.model": model, "response.model": model, # Duplicate data required by the UI "response.number_of_messages": number_of_messages, - "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), - "error": bedrock_attrs.get("error", None), + "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason"), + "error": bedrock_attrs.get("error"), } + + if all_token_counts: + chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) @@ -1106,6 +1258,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): request_model=model, request_id=request_id, llm_metadata_dict=llm_metadata_dict, + all_token_counts=all_token_counts, response_id=response_id, ) diff --git a/newrelic/hooks/mlmodel_gemini.py b/newrelic/hooks/mlmodel_gemini.py index 8aeb1355d0..6f61c11125 100644 --- a/newrelic/hooks/mlmodel_gemini.py +++ b/newrelic/hooks/mlmodel_gemini.py @@ -175,20 +175,24 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg embedding_content = str(embedding_content) request_model = kwargs.get("model") + embedding_token_count = ( + settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) + if settings.ai_monitoring.llm_token_count_callback + else None + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": request_model, "duration": ft.duration * 1000, "vendor": "gemini", "ingest_source": "Python", } + if embedding_token_count: + full_embedding_response_dict["response.usage.total_tokens"] = embedding_token_count + if settings.ai_monitoring.record_content.enabled: full_embedding_response_dict["input"] = embedding_content @@ -300,15 +304,13 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg "Unable to parse input message to Gemini LLM. Message content and role will be omitted from " "corresponding LlmChatCompletionMessage event. " ) + # Extract the input message content and role from the input message if it exists + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) - else: - request_temperature = None - request_max_tokens = None + # Extract data from generation config object + request_temperature, request_max_tokens = _extract_generation_config(kwargs) + # Prepare error attributes notice_error_attributes = { "http.statusCode": getattr(exc, "code", None), "error.message": getattr(exc, "message", None), @@ -348,15 +350,17 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, # Passing the request model as the response model here since we do not have access to a response model request_model, - request_model, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) @@ -377,6 +381,7 @@ def _handle_generation_success(transaction, linking_metadata, completion_id, kwa def _record_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, response): + settings = transaction.settings or global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") try: @@ -385,12 +390,14 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa # finish_reason is an enum, so grab just the stringified value from it to report finish_reason = response.get("candidates")[0].get("finish_reason").value output_message_list = [response.get("candidates")[0].get("content")] + token_usage = response.get("usage_metadata") or {} else: # Set all values to NoneTypes since we cannot access them through kwargs or another method that doesn't # require the response object response_model = None output_message_list = [] finish_reason = None + token_usage = {} request_model = kwargs.get("model") @@ -412,13 +419,44 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa "corresponding LlmChatCompletionMessage event. " ) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) + + # Parse output message content + # This list should have a length of 1 to represent the output message + # Parse the message text out to pass to any registered token counting callback + output_message_content = output_message_list[0].get("parts")[0].get("text") if output_message_list else None + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_token_count") + response_completion_tokens = token_usage.get("candidates_token_count") + response_total_tokens = token_usage.get("total_token_count") + else: - request_temperature = None - request_max_tokens = None + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + # Extract generation config + request_temperature, request_max_tokens = _extract_generation_config(kwargs) full_chat_completion_summary_dict = { "id": completion_id, @@ -438,66 +476,78 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa "response.number_of_messages": 1 + len(output_message_list), } + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) +def _parse_input_message(input_message): + # The input_message will be a string if generate_content was called directly. In this case, we don't have + # access to the role, so we default to user since this was an input message + if isinstance(input_message, str): + return input_message, "user" + # The input_message will be a Google Content type if send_message was called, so we parse out the message + # text and role (which should be "user") + elif isinstance(input_message, google.genai.types.Content): + return input_message.parts[0].text, input_message.role + else: + return None, None + + +def _extract_generation_config(kwargs): + generation_config = kwargs.get("config") + if generation_config: + request_temperature = getattr(generation_config, "temperature", None) + request_max_tokens = getattr(generation_config, "max_output_tokens", None) + else: + request_temperature = None + request_max_tokens = None + + return request_temperature, request_max_tokens + + def create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, chat_completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, ): try: settings = transaction.settings or global_settings() - if input_message: - # The input_message will be a string if generate_content was called directly. In this case, we don't have - # access to the role, so we default to user since this was an input message - if isinstance(input_message, str): - input_message_content = input_message - input_role = "user" - # The input_message will be a Google Content type if send_message was called, so we parse out the message - # text and role (which should be "user") - elif isinstance(input_message, google.genai.types.Content): - input_message_content = input_message.parts[0].text - input_role = input_message.role - # Set input data to NoneTypes to ensure token_count callback is not called - else: - input_message_content = None - input_role = None - + if input_message_content: message_id = str(uuid.uuid4()) chat_completion_input_message_dict = { "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) - if settings.ai_monitoring.llm_token_count_callback and input_message_content - else None - ), "role": input_role, "completion_id": chat_completion_id, # The input message will always be the first message in our request/ response sequence so this will @@ -507,6 +557,8 @@ def create_chat_completion_message_event( "vendor": "gemini", "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = input_message_content @@ -523,7 +575,7 @@ def create_chat_completion_message_event( # Add one to the index to account for the single input message so our sequence value is accurate for # the output message - if input_message: + if input_message_content: index += 1 message_id = str(uuid.uuid4()) @@ -532,11 +584,6 @@ def create_chat_completion_message_event( "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -546,6 +593,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index c3f7960b6e..3484762951 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -129,11 +129,11 @@ def create_chat_completion_message_event( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -153,11 +153,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -166,6 +161,9 @@ def create_chat_completion_message_event( "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message_content @@ -193,11 +191,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -207,6 +200,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content @@ -280,15 +276,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg else getattr(attribute_response, "organization", None) ) + response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": kwargs.get("model") or kwargs.get("engine"), "request_id": request_id, "duration": ft.duration * 1000, @@ -313,6 +312,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.usage.total_tokens": total_tokens, "vendor": "openai", "ingest_source": "Python", } @@ -475,12 +475,15 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response): + settings = transaction.settings if transaction.settings is not None else global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") + try: if response: response_model = response.get("model") response_id = response.get("id") + token_usage = response.get("usage") or {} output_message_list = [] finish_reason = None choices = response.get("choices") or [] @@ -494,6 +497,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa else: response_model = kwargs.get("response.model") response_id = kwargs.get("id") + token_usage = {} output_message_list = [] finish_reason = kwargs.get("finish_reason") if "content" in kwargs: @@ -505,10 +509,44 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa output_message_list = [] request_model = kwargs.get("model") or kwargs.get("engine") - request_id = response_headers.get("x-request-id") - organization = response_headers.get("openai-organization") or getattr(response, "organization", None) messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}] input_message_list = list(messages) + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_tokens") + response_completion_tokens = token_usage.get("completion_tokens") + response_total_tokens = token_usage.get("total_tokens") + + else: + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + request_id = response_headers.get("x-request-id") + organization = response_headers.get("openai-organization") or getattr(response, "organization", None) + full_chat_completion_summary_dict = { "id": completion_id, "span_id": span_id, @@ -553,6 +591,12 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa ), "response.number_of_messages": len(input_message_list) + len(output_message_list), } + + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) @@ -564,11 +608,11 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) @@ -579,6 +623,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] notice_error_attributes = {} + try: if OPENAI_V1: response = getattr(exc, "response", None) @@ -643,6 +688,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg output_message_list = [] if "content" in kwargs: output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}] + create_chat_completion_message_event( transaction, request_message_list, @@ -650,11 +696,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg span_id, trace_id, kwargs.get("response.model"), - request_model, response_id, request_id, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index da9c5818e7..87dfa1f1b6 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -189,7 +195,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -476,46 +482,3 @@ def _test(): converse_invalid_model(loop, bedrock_converse_server) _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, loop, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) - - _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index e02cc5b543..fe44fd17b3 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,7 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -207,7 +208,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -456,51 +457,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() - - def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming): async def _coro(): with pytest.raises(_client_error): @@ -799,58 +755,6 @@ async def _test(): loop.run_until_complete(_test()) -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) -@validate_custom_event_count(count=2) -@validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, -) -@validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, -) -@background_task(name="test_bedrock_chat_completion") -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - async def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = await bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - - body = response.get("body") - async for resp in body: - assert resp - - loop.run_until_complete(_test()) - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index 96b930feb5..dacfbb4eed 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -27,7 +27,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -164,7 +164,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -289,45 +289,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() @validate_custom_events(embedding_expected_malformed_request_body_events) @validate_custom_event_count(count=1) diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index fd970b0603..91443d0d52 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -98,6 +98,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 32, + "response.usage.total_tokens": 44, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -119,6 +122,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -137,6 +141,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -276,6 +281,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 21, + "response.usage.completion_tokens": 31, + "response.usage.total_tokens": 52, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "end_turn", @@ -297,6 +305,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -315,6 +324,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -395,6 +405,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 69, + "response.usage.total_tokens": 86, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -416,6 +429,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -434,6 +448,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1092,6 +1107,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 35, + "response.usage.total_tokens": 47, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -1113,6 +1131,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1131,6 +1150,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1151,6 +1171,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-instant-v1", "response.model": "anthropic.claude-instant-v1", + "response.usage.completion_tokens": 99, + "response.usage.prompt_tokens": 19, + "response.usage.total_tokens": 118, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop_sequence", @@ -1172,6 +1195,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1190,6 +1214,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1269,6 +1294,9 @@ "duration": None, # Response time varies each test run "request.model": "cohere.command-text-v14", "response.model": "cohere.command-text-v14", + "response.usage.completion_tokens": 91, + "response.usage.total_tokens": 100, + "response.usage.prompt_tokens": 9, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "COMPLETE", @@ -1290,6 +1318,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1308,6 +1337,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1328,6 +1358,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 117, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "length", @@ -1349,6 +1382,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1367,6 +1401,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index f5c227b9c3..af544af001 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -33,6 +33,7 @@ "response.model": "amazon.titan-embed-text-v1", "request.model": "amazon.titan-embed-text-v1", "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, @@ -52,6 +53,7 @@ "response.model": "amazon.titan-embed-g1-text-02", "request.model": "amazon.titan-embed-g1-text-02", "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 4422685b9f..90fb211214 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os from io import BytesIO @@ -35,7 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming): return chat_completion_expected_events[model_id] +@pytest.fixture(scope="module") +def expected_events(model_id, response_streaming): + if response_streaming: + return chat_completion_streaming_expected_events[model_id] + else: + return chat_completion_expected_events[model_id] + + @pytest.fixture(scope="module") def expected_metrics(response_streaming): if response_streaming: @@ -200,7 +210,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -438,49 +448,50 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() +# +# @reset_core_stats_engine() +# @override_llm_token_callback_settings(llm_token_count_callback) +# def test_bedrock_chat_completion_error_incorrect_access_key_with_token( +# monkeypatch, +# bedrock_server, +# exercise_model, +# set_trace_info, +# expected_invalid_access_key_error_events, +# expected_metrics, +# ): +# @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) +# @validate_error_trace_attributes( +# _client_error_name, +# exact_attrs={ +# "agent": {}, +# "intrinsic": {}, +# "user": { +# "http.statusCode": 403, +# "error.message": "The security token included in the request is invalid.", +# "error.code": "UnrecognizedClientException", +# }, +# }, +# ) +# @validate_transaction_metrics( +# name="test_bedrock_chat_completion", +# scoped_metrics=expected_metrics, +# rollup_metrics=expected_metrics, +# custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], +# background_task=True, +# ) +# @background_task(name="test_bedrock_chat_completion") +# def _test(): +# monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") +# +# with pytest.raises(_client_error): # not sure where this exception actually comes from +# set_trace_info() +# add_custom_attribute("llm.conversation_id", "my-awesome-id") +# add_custom_attribute("llm.foo", "bar") +# add_custom_attribute("non_llm_attr", "python-agent") +# +# exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) +# +# _test() @reset_core_stats_engine() @@ -762,55 +773,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) - @validate_custom_event_count(count=2) - @validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - list(response["body"]) # Iterate - - _test() - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 417e24b2d9..de2cb201e7 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -28,7 +28,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -161,7 +161,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -286,45 +286,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info): """ diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py index 96ead41dd7..2d38d6b4a4 100644 --- a/tests/external_botocore/test_chat_completion_converse.py +++ b/tests/external_botocore/test_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -185,7 +191,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -468,57 +474,3 @@ def _test(): assert response _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - - assert response - - _test() diff --git a/tests/mlmodel_gemini/test_embeddings.py b/tests/mlmodel_gemini/test_embeddings.py index 0fc92897b6..5b4e30f860 100644 --- a/tests/mlmodel_gemini/test_embeddings.py +++ b/tests/mlmodel_gemini/test_embeddings.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -93,7 +93,7 @@ def test_gemini_embedding_sync_no_content(gemini_dev_client, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_sync_with_token_count", @@ -177,7 +177,7 @@ def test_gemini_embedding_async_no_content(gemini_dev_client, loop, set_trace_in @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_embeddings_error.py b/tests/mlmodel_gemini/test_embeddings_error.py index a65a6c2c6f..f0e7aac58a 100644 --- a/tests/mlmodel_gemini/test_embeddings_error.py +++ b/tests/mlmodel_gemini/test_embeddings_error.py @@ -16,12 +16,10 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -159,34 +157,6 @@ def test_embeddings_invalid_request_error_invalid_model(gemini_dev_client, set_t gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -326,36 +296,6 @@ def test_embeddings_async_invalid_request_error_invalid_model(gemini_dev_client, ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_async_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, loop, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - loop.run_until_complete( - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - ) - - # Wrong api_key provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_gemini/test_text_generation.py b/tests/mlmodel_gemini/test_text_generation.py index faec66aa75..3da978e777 100644 --- a/tests/mlmodel_gemini/test_text_generation.py +++ b/tests/mlmodel_gemini/test_text_generation.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -50,6 +50,9 @@ "vendor": "gemini", "ingest_source": "Python", "response.number_of_messages": 2, + "response.usage.prompt_tokens": 9, + "response.usage.completion_tokens": 13, + "response.usage.total_tokens": 22, }, ), ( @@ -60,6 +63,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": "How many letters are in the word Python?", "role": "user", "completion_id": None, @@ -77,6 +81,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": 'There are **6** letters in the word "Python".\n', "role": "model", "completion_id": None, @@ -183,7 +188,8 @@ def test_gemini_text_generation_sync_no_content(gemini_dev_client, set_trace_inf @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +# Ensure LLM callback is invoked and response token counts are overridden +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_sync_with_token_count", @@ -324,7 +330,7 @@ def test_gemini_text_generation_async_no_content(gemini_dev_client, loop, set_tr @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_text_generation_error.py b/tests/mlmodel_gemini/test_text_generation_error.py index 5e6f1c04de..c92e1a2d45 100644 --- a/tests/mlmodel_gemini/test_text_generation_error.py +++ b/tests/mlmodel_gemini/test_text_generation_error.py @@ -17,13 +17,11 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -63,6 +61,7 @@ "trace_id": "trace-id", "content": "How many letters are in the word Python?", "role": "user", + "token_count": 0, "completion_id": None, "sequence": 0, "vendor": "gemini", @@ -167,6 +166,7 @@ def _test(): "trace_id": "trace-id", "content": "Model does not exist.", "role": "user", + "token_count": 0, "completion_id": None, "response.model": "does-not-exist", "sequence": 0, @@ -179,39 +179,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -227,7 +194,7 @@ def test_text_generation_invalid_request_error_invalid_model_with_token_count(ge rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_client, set_trace_info): @@ -266,6 +233,7 @@ def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_cli "trace_id": "trace-id", "content": "Invalid API key.", "role": "user", + "token_count": 0, "response.model": "gemini-flash-2.0", "completion_id": None, "sequence": 0, @@ -377,43 +345,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_async_invalid_request_error_invalid_model_with_token_count( - gemini_dev_client, loop, set_trace_info -): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -429,7 +360,7 @@ def test_text_generation_async_invalid_request_error_invalid_model_with_token_co rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_async_invalid_request_error_invalid_model_chat(gemini_dev_client, loop, set_trace_info): diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index a6b7470a9a..5d7586ffb9 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -371,6 +371,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999992, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 8, "vendor": "openai", "ingest_source": "Python", "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]", @@ -394,6 +395,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999998, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 1, "vendor": "openai", "ingest_source": "Python", "input": "[[10590]]", @@ -464,6 +466,9 @@ "response.headers.ratelimitResetRequests": "8.64s", "response.headers.ratelimitRemainingTokens": 199912, "response.headers.ratelimitRemainingRequests": 9999, + "response.usage.prompt_tokens": 73, + "response.usage.completion_tokens": 375, + "response.usage.total_tokens": 448, "response.number_of_messages": 3, }, ], @@ -479,6 +484,7 @@ "sequence": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?", }, @@ -495,6 +501,7 @@ "sequence": 1, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "math", }, @@ -511,6 +518,7 @@ "sequence": 2, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "is_response": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n \n
  3. What is the product of 6 x 7?
  4. \n \n
  5. What is the square root of 64?
  6. \n \n
  7. What is the result of 12 / 4?
  8. \n \n
  9. What is the sum of 15 + 9?
  10. \n \n
\n\n\n```", diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 1f8cf1cb74..5e4d209ed7 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -55,6 +55,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 11, + "response.usage.total_tokens": 64, + "response.usage.prompt_tokens": 53, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 200, @@ -81,6 +84,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -99,6 +103,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -117,6 +122,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "is_response": True, @@ -172,7 +178,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -343,7 +349,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index bfb2267a33..97a4dd8793 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -186,6 +186,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -193,36 +194,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -281,6 +252,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -296,6 +268,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -360,6 +333,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -471,37 +445,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 9be9fcab9c..5af1598847 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -14,13 +14,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -67,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -82,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -229,6 +229,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -266,37 +267,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -329,41 +299,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - expected_events_on_wrong_api_key_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -391,6 +326,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -610,39 +546,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -677,41 +580,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index ad89d6f260..8019c0b6a9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -15,7 +15,8 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -184,9 +185,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): assert resp +chat_completion_recorded_token_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openai", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -378,7 +471,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index eebb5ee8fb..e8e55426e9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -198,38 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -290,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -305,6 +275,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -374,6 +345,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -488,38 +460,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -649,6 +589,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index 5f769ea0e6..64798300fc 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. - import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -243,6 +242,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -281,77 +281,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn assert resp -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - generator = sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - async def consumer(): - generator = await async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - async for resp in generator: - assert resp - - loop.run_until_complete(consumer()) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -414,6 +343,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 796404012b..c88e8b1df6 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -17,7 +17,8 @@ from conftest import get_openai_version from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -300,7 +301,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -622,7 +625,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant # @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 817db35d8e..007effcb17 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -54,6 +54,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 75, + "response.usage.total_tokens": 101, + "response.usage.prompt_tokens": 26, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, @@ -80,6 +83,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -98,6 +102,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -116,6 +121,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "is_response": True, @@ -193,7 +199,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -389,7 +395,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index c3c3e7c429..935db04fe0 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -19,7 +19,7 @@ validate_attributes, ) from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -55,6 +55,7 @@ "response.headers.ratelimitResetRequests": "19m45.394s", "response.headers.ratelimitRemainingTokens": 149994, "response.headers.ratelimitRemainingRequests": 197, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_sync_with_token_count", @@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_async_with_token_count", diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py index a8e46bf23a..f80e6ff41d 100644 --- a/tests/mlmodel_openai/test_embeddings_error.py +++ b/tests/mlmodel_openai/test_embeddings_error.py @@ -14,12 +14,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info): ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "The model `does-not-exist` does not exist" - # "http.statusCode": 404, - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - openai.Embedding.create(input="Model does not exist.", model="does-not-exist") - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist")) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py index fd29236122..499f96893b 100644 --- a/tests/mlmodel_openai/test_embeddings_error_v1.py +++ b/tests/mlmodel_openai/test_embeddings_error_v1.py @@ -16,12 +16,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - ) - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t ) # no model provided -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.with_raw_response.create( - input="Model does not exist.", model="does-not-exist" - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 405a2a9e5f..3801d3639c 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -48,6 +48,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999994, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_sync_with_token_count", @@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_async_with_token_count", diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 4ff70c7ed4..8c2c0444f0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -29,6 +29,7 @@ def llm_token_count_callback(model, content): return 105 +# This will be removed once all LLM instrumentations have been converted to use new token count design def add_token_count_to_events(expected_events): events = copy.deepcopy(expected_events) for event in events: @@ -37,6 +38,32 @@ def add_token_count_to_events(expected_events): return events +def add_token_count_to_embedding_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmEmbedding": + event[1]["response.usage.total_tokens"] = 105 + return events + + +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + +def add_token_counts_to_chat_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionSummary": + event[1]["response.usage.prompt_tokens"] = 105 + event[1]["response.usage.completion_tokens"] = 105 + event[1]["response.usage.total_tokens"] = 210 + return events + + def events_sans_content(event): new_event = copy.deepcopy(event) for _event in new_event: