Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -1,3 +1,5 @@
import json
import os
import sys
import tempfile
import unittest
@ -12,7 +14,9 @@ if str(SRC) not in sys.path:
import aiprocess
from aiprocess import (
ExternalApiProcessor,
_assert_expected_model_checksum,
_build_request_payload,
_extract_cleaned_text,
_profile_generation_kwargs,
_supports_response_format,
@ -120,6 +124,20 @@ class ModelChecksumTests(unittest.TestCase):
_assert_expected_model_checksum("0" * 64)
class RequestPayloadTests(unittest.TestCase):
def test_build_request_payload_with_dictionary(self):
payload = _build_request_payload("hello", lang="en", dictionary_context="Docker")
self.assertEqual(payload["language"], "en")
self.assertEqual(payload["transcript"], "hello")
self.assertEqual(payload["dictionary"], "Docker")
def test_build_request_payload_omits_empty_dictionary(self):
payload = _build_request_payload("hello", lang="en", dictionary_context=" ")
self.assertEqual(payload["language"], "en")
self.assertEqual(payload["transcript"], "hello")
self.assertNotIn("dictionary", payload)
class _Response:
def __init__(self, payload: bytes):
self.payload = payload
@ -136,9 +154,13 @@ class _Response:
return str(len(self.payload))
return None
def read(self, size: int) -> bytes:
def read(self, size: int = -1) -> bytes:
if self.offset >= len(self.payload):
return b""
if size < 0:
chunk = self.payload[self.offset :]
self.offset = len(self.payload)
return chunk
chunk = self.payload[self.offset : self.offset + size]
self.offset += len(chunk)
return chunk
@ -196,5 +218,42 @@ class EnsureModelTests(unittest.TestCase):
ensure_model()
class ExternalApiProcessorTests(unittest.TestCase):
def test_requires_api_key_env_var(self):
with patch.dict(os.environ, {}, clear=True):
with self.assertRaisesRegex(RuntimeError, "missing external api key"):
ExternalApiProcessor(
provider="openai",
base_url="https://api.openai.com/v1",
model="gpt-4o-mini",
api_key_env_var="AMAN_EXTERNAL_API_KEY",
timeout_ms=1000,
max_retries=0,
)
def test_process_uses_chat_completion_endpoint(self):
response_payload = {
"choices": [{"message": {"content": '{"cleaned_text":"clean"}'}}],
}
response_body = json.dumps(response_payload).encode("utf-8")
with patch.dict(os.environ, {"AMAN_EXTERNAL_API_KEY": "test-key"}, clear=True), patch(
"aiprocess.urllib.request.urlopen",
return_value=_Response(response_body),
) as urlopen:
processor = ExternalApiProcessor(
provider="openai",
base_url="https://api.openai.com/v1",
model="gpt-4o-mini",
api_key_env_var="AMAN_EXTERNAL_API_KEY",
timeout_ms=1000,
max_retries=0,
)
out = processor.process("raw text", dictionary_context="Docker")
self.assertEqual(out, "clean")
request = urlopen.call_args[0][0]
self.assertTrue(request.full_url.endswith("/chat/completions"))
if __name__ == "__main__":
unittest.main()