Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/helicone.py

229 lines
8.4 KiB
Python
Raw Normal View History

#### What this does ####
# On success, logs events to Helicone
import os
import traceback
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.helicone_mock_client import (
should_use_helicone_mock,
create_mock_helicone_client,
)
class HeliconeLogger:
# Class variables or attributes
helicone_model_list = [
"gpt",
"claude",
"gemini",
"command-r",
"command-r-plus",
"command-light",
"command-medium",
"command-medium-beta",
"command-xlarge-nightly",
"command-nightly",
]
def __init__(self):
# Instance variables
self.is_mock_mode = should_use_helicone_mock()
if self.is_mock_mode:
create_mock_helicone_client()
verbose_logger.info(
"[HELICONE MOCK] Helicone logger initialized in mock mode"
)
self.provider_url = "https://api.openai.com/v1"
self.key = os.getenv("HELICONE_API_KEY")
self.api_base = os.getenv("HELICONE_API_BASE") or "https://api.hconeai.com"
if self.api_base.endswith("/"):
self.api_base = self.api_base[:-1]
def claude_mapping(self, model, messages, response_obj):
from anthropic import AI_PROMPT, HUMAN_PROMPT
prompt = f"{HUMAN_PROMPT}"
for message in messages:
if "role" in message:
if message["role"] == "user":
prompt += f"{HUMAN_PROMPT}{message['content']}"
else:
prompt += f"{AI_PROMPT}{message['content']}"
else:
prompt += f"{HUMAN_PROMPT}{message['content']}"
prompt += f"{AI_PROMPT}"
choice = response_obj["choices"][0]
message = choice["message"]
content = []
if "tool_calls" in message and message["tool_calls"]:
for tool_call in message["tool_calls"]:
content.append(
{
"type": "tool_use",
"id": tool_call["id"],
"name": tool_call["function"]["name"],
"input": tool_call["function"]["arguments"],
}
)
elif "content" in message and message["content"]:
content = [{"type": "text", "text": message["content"]}]
claude_response_obj = {
"id": response_obj["id"],
"type": "message",
"role": "assistant",
"model": model,
"content": content,
"stop_reason": choice["finish_reason"],
"stop_sequence": None,
"usage": {
"input_tokens": response_obj["usage"]["prompt_tokens"],
"output_tokens": response_obj["usage"]["completion_tokens"],
},
}
return claude_response_obj
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_"
and overwrites litellm_params.metadata if already included.
For example if you want to add custom property to your request, send
`headers: { ..., helicone-property-something: 1234 }` via proxy request.
"""
if litellm_params is None:
return metadata
if litellm_params.get("proxy_server_request") is None:
return metadata
if metadata is None:
metadata = {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for header_key in proxy_headers:
if header_key.startswith("helicone_"):
metadata[header_key] = proxy_headers.get(header_key)
# Remove OpenTelemetry span from metadata as it's not JSON serializable
# The span is used internally for tracing but shouldn't be logged to external services
if "litellm_parent_otel_span" in metadata:
metadata.pop("litellm_parent_otel_span")
return metadata
def log_success(
self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs
):
# Method definition
try:
print_verbose(
f"Helicone Logging - Enters logging function for model {model}"
)
litellm_params = kwargs.get("litellm_params", {})
custom_llm_provider = litellm_params.get("custom_llm_provider", "")
kwargs.get("litellm_call_id", None)
metadata = litellm_params.get("metadata", {}) or {}
metadata = self.add_metadata_from_header(litellm_params, metadata)
# Check if model is a vertex_ai model
is_vertex_ai = custom_llm_provider == "vertex_ai" or model.startswith(
"vertex_ai/"
)
model = (
model
if any(
accepted_model in model
for accepted_model in self.helicone_model_list
)
or is_vertex_ai
else "gpt-3.5-turbo"
)
provider_request = {"model": model, "messages": messages}
if isinstance(response_obj, litellm.EmbeddingResponse) or isinstance(
response_obj, litellm.ModelResponse
):
response_obj = response_obj.json()
if "claude" in model and not is_vertex_ai:
response_obj = self.claude_mapping(
model=model, messages=messages, response_obj=response_obj
)
providerResponse = {
"json": response_obj,
"headers": {"openai-version": "2020-10-01"},
"status": 200,
}
# Code to be executed
provider_url = self.provider_url
url = f"{self.api_base}/oai/v1/log"
if "claude" in model and not is_vertex_ai:
url = f"{self.api_base}/anthropic/v1/log"
provider_url = "https://api.anthropic.com/v1/messages"
elif is_vertex_ai:
url = f"{self.api_base}/custom/v1/log"
provider_url = "https://aiplatform.googleapis.com/v1"
elif "gemini" in model:
url = f"{self.api_base}/custom/v1/log"
provider_url = "https://generativelanguage.googleapis.com/v1beta"
headers = {
"Authorization": f"Bearer {self.key}",
"Content-Type": "application/json",
}
start_time_seconds = int(start_time.timestamp())
start_time_milliseconds = int(
(start_time.timestamp() - start_time_seconds) * 1000
)
end_time_seconds = int(end_time.timestamp())
end_time_milliseconds = int(
(end_time.timestamp() - end_time_seconds) * 1000
)
meta = {"Helicone-Auth": f"Bearer {self.key}"}
meta.update(metadata)
data = {
"providerRequest": {
"url": provider_url,
"json": provider_request,
"meta": meta,
},
"providerResponse": providerResponse,
"timing": {
"startTime": {
"seconds": start_time_seconds,
"milliseconds": start_time_milliseconds,
},
"endTime": {
"seconds": end_time_seconds,
"milliseconds": end_time_milliseconds,
},
}, # {"seconds": .., "milliseconds": ..}
}
response = litellm.module_level_client.post(url, headers=headers, json=data)
if response.status_code == 200:
if self.is_mock_mode:
print_verbose(
"[HELICONE MOCK] Helicone Logging - Successfully mocked!"
)
else:
print_verbose("Helicone Logging - Success!")
else:
print_verbose(
f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
)
print_verbose(f"Helicone Logging - Error {response.text}")
except Exception:
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
pass