Add benchmark-driven model promotion workflow and pipeline stages
Some checks failed
ci / test-and-build (push) Has been cancelled

This commit is contained in:
Thales Maciel 2026-02-28 15:12:33 -03:00
parent 98b13d1069
commit 8c1f7c1e13
38 changed files with 5300 additions and 503 deletions

View file

@ -111,11 +111,21 @@ class FakeUnsupportedLanguageModel:
class FakeAIProcessor:
def __init__(self):
self.last_kwargs = {}
self.warmup_calls = []
self.warmup_error = None
self.process_error = None
def process(self, text, lang="auto", **_kwargs):
if self.process_error is not None:
raise self.process_error
self.last_kwargs = {"lang": lang, **_kwargs}
return text
def warmup(self, profile="default"):
self.warmup_calls.append(profile)
if self.warmup_error:
raise self.warmup_error
class FakeAudio:
def __init__(self, size: int):
@ -212,6 +222,32 @@ class DaemonTests(unittest.TestCase):
self.assertEqual(desktop.inject_calls, [("good morning Marta", "clipboard", False)])
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@patch("aman.start_audio_recording", return_value=(object(), object()))
def test_editor_failure_aborts_output_injection(self, _start_mock, _stop_mock):
desktop = FakeDesktop()
model = FakeModel(text="hello world")
ai_processor = FakeAIProcessor()
ai_processor.process_error = RuntimeError("editor boom")
daemon = self._build_daemon(
desktop,
model,
verbose=False,
ai_processor=ai_processor,
)
daemon._start_stop_worker = (
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
stream, record, trigger, process_audio
)
)
daemon.toggle()
daemon.toggle()
self.assertEqual(desktop.inject_calls, [])
self.assertEqual(daemon.get_state(), aman.State.IDLE)
def test_transcribe_skips_hints_when_model_does_not_support_them(self):
desktop = FakeDesktop()
model = FakeModel(text="hello")
@ -242,7 +278,7 @@ class DaemonTests(unittest.TestCase):
self.assertEqual(used_lang, "auto")
self.assertIn("Docker", model.last_kwargs["hotwords"])
self.assertIn("Systemd", model.last_kwargs["hotwords"])
self.assertIn("Preferred vocabulary", model.last_kwargs["initial_prompt"])
self.assertIsNone(model.last_kwargs["initial_prompt"])
def test_transcribe_uses_configured_language_hint(self):
desktop = FakeDesktop()
@ -300,7 +336,7 @@ class DaemonTests(unittest.TestCase):
daemon_verbose = self._build_daemon(desktop, FakeModel(), cfg=cfg, verbose=True)
self.assertTrue(daemon_verbose.log_transcript)
def test_ai_processor_is_initialized_during_daemon_init(self):
def test_editor_stage_is_initialized_during_daemon_init(self):
desktop = FakeDesktop()
with patch("aman._build_whisper_model", return_value=FakeModel()), patch(
"aman.LlamaProcessor", return_value=FakeAIProcessor()
@ -308,7 +344,47 @@ class DaemonTests(unittest.TestCase):
daemon = aman.Daemon(self._config(), desktop, verbose=True)
processor_cls.assert_called_once_with(verbose=True, model_path=None)
self.assertIsNotNone(daemon.ai_processor)
self.assertIsNotNone(daemon.editor_stage)
def test_editor_stage_is_warmed_up_during_daemon_init(self):
desktop = FakeDesktop()
ai_processor = FakeAIProcessor()
with patch("aman._build_whisper_model", return_value=FakeModel()), patch(
"aman.LlamaProcessor", return_value=ai_processor
):
daemon = aman.Daemon(self._config(), desktop, verbose=False)
self.assertIs(daemon.editor_stage._processor, ai_processor)
self.assertEqual(ai_processor.warmup_calls, ["default"])
def test_editor_stage_warmup_failure_is_fatal_with_strict_startup(self):
desktop = FakeDesktop()
cfg = self._config()
cfg.advanced.strict_startup = True
ai_processor = FakeAIProcessor()
ai_processor.warmup_error = RuntimeError("warmup boom")
with patch("aman._build_whisper_model", return_value=FakeModel()), patch(
"aman.LlamaProcessor", return_value=ai_processor
):
with self.assertRaisesRegex(RuntimeError, "editor stage warmup failed"):
aman.Daemon(cfg, desktop, verbose=False)
def test_editor_stage_warmup_failure_is_non_fatal_without_strict_startup(self):
desktop = FakeDesktop()
cfg = self._config()
cfg.advanced.strict_startup = False
ai_processor = FakeAIProcessor()
ai_processor.warmup_error = RuntimeError("warmup boom")
with patch("aman._build_whisper_model", return_value=FakeModel()), patch(
"aman.LlamaProcessor", return_value=ai_processor
):
with self.assertLogs(level="WARNING") as logs:
daemon = aman.Daemon(cfg, desktop, verbose=False)
self.assertIs(daemon.editor_stage._processor, ai_processor)
self.assertTrue(
any("continuing because advanced.strict_startup=false" in line for line in logs.output)
)
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@patch("aman.start_audio_recording", return_value=(object(), object()))