Add benchmark-driven model promotion workflow and pipeline stages
Some checks failed
ci / test-and-build (push) Has been cancelled
Some checks failed
ci / test-and-build (push) Has been cancelled
This commit is contained in:
parent
98b13d1069
commit
8c1f7c1e13
38 changed files with 5300 additions and 503 deletions
|
|
@ -15,8 +15,11 @@ if str(SRC) not in sys.path:
|
|||
import aiprocess
|
||||
from aiprocess import (
|
||||
ExternalApiProcessor,
|
||||
LlamaProcessor,
|
||||
_assert_expected_model_checksum,
|
||||
_build_request_payload,
|
||||
_build_user_prompt_xml,
|
||||
_explicit_generation_kwargs,
|
||||
_extract_cleaned_text,
|
||||
_profile_generation_kwargs,
|
||||
_supports_response_format,
|
||||
|
|
@ -114,6 +117,75 @@ class SupportsResponseFormatTests(unittest.TestCase):
|
|||
|
||||
self.assertEqual(kwargs, {})
|
||||
|
||||
def test_explicit_generation_kwargs_honors_supported_params(self):
|
||||
def chat_completion(*, messages, temperature, top_p, max_tokens):
|
||||
return None
|
||||
|
||||
kwargs = _explicit_generation_kwargs(
|
||||
chat_completion,
|
||||
top_p=0.9,
|
||||
top_k=40,
|
||||
max_tokens=128,
|
||||
repeat_penalty=1.1,
|
||||
min_p=0.05,
|
||||
)
|
||||
self.assertEqual(kwargs, {"top_p": 0.9, "max_tokens": 128})
|
||||
|
||||
|
||||
class _WarmupClient:
|
||||
def __init__(self, response_payload: dict):
|
||||
self.response_payload = response_payload
|
||||
self.calls = []
|
||||
|
||||
def create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
messages,
|
||||
temperature,
|
||||
response_format=None,
|
||||
max_tokens=None,
|
||||
):
|
||||
self.calls.append(
|
||||
{
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"response_format": response_format,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
)
|
||||
return self.response_payload
|
||||
|
||||
|
||||
class LlamaWarmupTests(unittest.TestCase):
|
||||
def test_warmup_uses_json_mode_and_low_token_budget(self):
|
||||
processor = object.__new__(LlamaProcessor)
|
||||
client = _WarmupClient(
|
||||
{"choices": [{"message": {"content": '{"cleaned_text":"ok"}'}}]}
|
||||
)
|
||||
processor.client = client
|
||||
|
||||
processor.warmup(profile="fast")
|
||||
|
||||
self.assertEqual(len(client.calls), 1)
|
||||
call = client.calls[0]
|
||||
self.assertEqual(call["temperature"], 0.0)
|
||||
self.assertEqual(call["response_format"], {"type": "json_object"})
|
||||
self.assertEqual(call["max_tokens"], 32)
|
||||
user_content = call["messages"][1]["content"]
|
||||
self.assertIn("<request>", user_content)
|
||||
self.assertIn("<transcript>warmup</transcript>", user_content)
|
||||
self.assertIn("<language>auto</language>", user_content)
|
||||
|
||||
def test_warmup_raises_on_non_json_response(self):
|
||||
processor = object.__new__(LlamaProcessor)
|
||||
client = _WarmupClient(
|
||||
{"choices": [{"message": {"content": "not-json"}}]}
|
||||
)
|
||||
processor.client = client
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, "expected JSON"):
|
||||
processor.warmup(profile="default")
|
||||
|
||||
|
||||
class ModelChecksumTests(unittest.TestCase):
|
||||
def test_accepts_expected_checksum_case_insensitive(self):
|
||||
|
|
@ -137,6 +209,19 @@ class RequestPayloadTests(unittest.TestCase):
|
|||
self.assertEqual(payload["transcript"], "hello")
|
||||
self.assertNotIn("dictionary", payload)
|
||||
|
||||
def test_user_prompt_is_xml_and_escapes_literals(self):
|
||||
payload = _build_request_payload(
|
||||
'keep <transcript> and "quotes"',
|
||||
lang="en",
|
||||
dictionary_context="Docker & systemd",
|
||||
)
|
||||
xml = _build_user_prompt_xml(payload)
|
||||
self.assertIn("<request>", xml)
|
||||
self.assertIn("<language>en</language>", xml)
|
||||
self.assertIn("<transcript>", xml)
|
||||
self.assertIn("&", xml)
|
||||
self.assertIn("<output_contract>", xml)
|
||||
|
||||
|
||||
class _Response:
|
||||
def __init__(self, payload: bytes):
|
||||
|
|
@ -254,6 +339,21 @@ class ExternalApiProcessorTests(unittest.TestCase):
|
|||
request = urlopen.call_args[0][0]
|
||||
self.assertTrue(request.full_url.endswith("/chat/completions"))
|
||||
|
||||
def test_warmup_is_a_noop(self):
|
||||
with patch.dict(os.environ, {"AMAN_EXTERNAL_API_KEY": "test-key"}, clear=True):
|
||||
processor = ExternalApiProcessor(
|
||||
provider="openai",
|
||||
base_url="https://api.openai.com/v1",
|
||||
model="gpt-4o-mini",
|
||||
api_key_env_var="AMAN_EXTERNAL_API_KEY",
|
||||
timeout_ms=1000,
|
||||
max_retries=0,
|
||||
)
|
||||
with patch("aiprocess.urllib.request.urlopen") as urlopen:
|
||||
processor.warmup(profile="fast")
|
||||
|
||||
urlopen.assert_not_called()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue