-
Notifications
You must be signed in to change notification settings - Fork 323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DO NOT MERGE, wip audio evals #5616
base: main
Are you sure you want to change the base?
Changes from 11 commits
38ec52e
0edc57e
822b918
c99c52d
1fdee44
3ebe948
2552190
4efc6b8
cd548b3
490c090
b4fc91d
44deeb8
e609a36
58e083e
19ba695
829f664
2ca4464
c4aacfd
1019d86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
from phoenix.evals.models.base import BaseModel | ||
from phoenix.evals.models.rate_limiters import RateLimiter | ||
from phoenix.evals.templates import MultimodalPrompt, PromptPartContentType | ||
from phoenix.evals.utils import AudioFormat, Audio | ||
|
||
MINIMUM_OPENAI_VERSION = "1.0.0" | ||
MODEL_TOKEN_LIMIT_MAPPING = { | ||
|
@@ -279,27 +280,43 @@ def _init_rate_limiter(self) -> None: | |
) | ||
|
||
def _build_messages( | ||
self, prompt: MultimodalPrompt, system_instruction: Optional[str] = None | ||
self, prompt: MultimodalPrompt, data_fetcher: Optional[Callable[[str], Audio]] = None, system_instruction: Optional[str] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also note that the type annotation on the callable does not match the docstring, it's best if users return a familiar type instead of an internal one I think |
||
) -> List[Dict[str, str]]: | ||
messages = [] | ||
for parts in prompt.parts: | ||
if parts.content_type == PromptPartContentType.TEXT: | ||
messages.append({"role": "system", "content": parts.content}) | ||
for part in prompt.parts: | ||
if part.content_type == PromptPartContentType.TEXT: | ||
messages.append({"role": "system", "content": part.content}) | ||
elif part.content_type == PromptPartContentType.AUDIO: | ||
audio_object = data_fetcher(part.content) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't super like the fact that we've plumbed an arbitrary callable down to this level, maybe I'm overthinking it |
||
messages.append( | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{ | ||
"type": "input_audio", | ||
"input_audio": { | ||
"data": audio_object.data, | ||
"format": audio_object.format.value | ||
} | ||
} | ||
], | ||
} | ||
) | ||
else: | ||
raise ValueError(f"Unsupported content type: {parts.content_type}") | ||
raise ValueError(f"Unsupported content type: {part.content_type}") | ||
if system_instruction: | ||
messages.insert(0, {"role": "system", "content": str(system_instruction)}) | ||
return messages | ||
|
||
def verbose_generation_info(self) -> str: | ||
return f"OpenAI invocation parameters: {self.public_invocation_params}" | ||
|
||
async def _async_generate(self, prompt: Union[str, MultimodalPrompt], **kwargs: Any) -> str: | ||
async def _async_generate(self, prompt: Union[str, MultimodalPrompt], data_fetcher: Optional[Callable[[str], Audio]] = None, **kwargs: Any) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's probably not a good idea not to change the interface for |
||
if isinstance(prompt, str): | ||
prompt = MultimodalPrompt.from_string(prompt) | ||
|
||
invoke_params = self.invocation_params | ||
messages = self._build_messages(prompt, kwargs.get("instruction")) | ||
messages = self._build_messages(prompt, data_fetcher, kwargs.get("instruction")) | ||
if functions := kwargs.get("functions"): | ||
invoke_params["functions"] = functions | ||
if function_call := kwargs.get("function_call"): | ||
|
@@ -316,12 +333,16 @@ async def _async_generate(self, prompt: Union[str, MultimodalPrompt], **kwargs: | |
return str(function_call.get("arguments") or "") | ||
return str(message["content"]) | ||
|
||
def _generate(self, prompt: Union[str, MultimodalPrompt], **kwargs: Any) -> str: | ||
def _generate(self, prompt: Union[str, MultimodalPrompt], data_fetcher: Optional[Callable[[str], Audio]] = None, **kwargs: Any) -> str: | ||
if isinstance(prompt, str): | ||
prompt = MultimodalPrompt.from_string(prompt) | ||
|
||
invoke_params = self.invocation_params | ||
messages = self._build_messages(prompt, kwargs.get("instruction")) | ||
messages = self._build_messages( | ||
prompt=prompt, | ||
data_fetcher=data_fetcher, | ||
system_instruction=kwargs.get("instruction") | ||
) | ||
if functions := kwargs.get("functions"): | ||
invoke_params["functions"] = functions | ||
if function_call := kwargs.get("function_call"): | ||
|
@@ -362,7 +383,7 @@ async def _async_completion(**kwargs: Any) -> Any: | |
|
||
return await _async_completion(**kwargs) | ||
|
||
def _rate_limited_completion(self, **kwargs: Any) -> Any: | ||
def _rate_limited_completion(self, audio_format: str, **kwargs: Any) -> Any: | ||
@self._rate_limiter.limit | ||
def _completion(**kwargs: Any) -> Any: | ||
try: | ||
|
@@ -374,6 +395,7 @@ def _completion(**kwargs: Any) -> Any: | |
) | ||
# OpenAI 1.0.0 API responses are pydantic objects, not dicts | ||
# We must dump the model to get the dict | ||
# TODO consider if there are additional changes + consider 'modalities' parameter | ||
return self._client.completions.create(**kwargs).model_dump() | ||
return self._client.chat.completions.create(**kwargs).model_dump() | ||
except self._openai._exceptions.BadRequestError as e: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still don't think we should be requiring the user return our internal type, it feels like a structure they need to learn / import from that feels clunky