Add benchmark-driven model promotion workflow and pipeline stages
Some checks failed
ci / test-and-build (push) Has been cancelled

This commit is contained in:
Thales Maciel 2026-02-28 15:12:33 -03:00
parent 98b13d1069
commit 8c1f7c1e13
38 changed files with 5300 additions and 503 deletions

3
src/engine/__init__.py Normal file
View file

@ -0,0 +1,3 @@
from .pipeline import PipelineEngine, PipelineResult
__all__ = ["PipelineEngine", "PipelineResult"]

154
src/engine/pipeline.py Normal file
View file

@ -0,0 +1,154 @@
from __future__ import annotations
import time
from dataclasses import dataclass
from typing import Any
from stages.alignment_edits import AlignmentDecision, AlignmentHeuristicEngine
from stages.asr_whisper import AsrResult
from stages.editor_llama import EditorResult
from stages.fact_guard import FactGuardEngine, FactGuardViolation
from vocabulary import VocabularyEngine
@dataclass
class PipelineResult:
asr: AsrResult | None
editor: EditorResult | None
output_text: str
alignment_ms: float
alignment_applied: int
alignment_skipped: int
alignment_decisions: list[AlignmentDecision]
fact_guard_ms: float
fact_guard_action: str
fact_guard_violations: int
fact_guard_details: list[FactGuardViolation]
vocabulary_ms: float
total_ms: float
class PipelineEngine:
def __init__(
self,
*,
asr_stage: Any | None,
editor_stage: Any,
vocabulary: VocabularyEngine,
alignment_engine: AlignmentHeuristicEngine | None = None,
fact_guard_engine: FactGuardEngine | None = None,
safety_enabled: bool = True,
safety_strict: bool = False,
) -> None:
self._asr_stage = asr_stage
self._editor_stage = editor_stage
self._vocabulary = vocabulary
self._alignment_engine = alignment_engine or AlignmentHeuristicEngine()
self._fact_guard_engine = fact_guard_engine or FactGuardEngine()
self._safety_enabled = bool(safety_enabled)
self._safety_strict = bool(safety_strict)
def run_audio(self, audio: Any) -> PipelineResult:
if self._asr_stage is None:
raise RuntimeError("asr stage is not configured")
started = time.perf_counter()
asr_result = self._asr_stage.transcribe(audio)
return self._run_transcript_core(
asr_result.raw_text,
language=asr_result.language,
asr_result=asr_result,
words=asr_result.words,
started_at=started,
)
def run_transcript(self, transcript: str, *, language: str = "auto") -> PipelineResult:
return self._run_transcript_core(
transcript,
language=language,
asr_result=None,
words=None,
started_at=time.perf_counter(),
)
def _run_transcript_core(
self,
transcript: str,
*,
language: str,
asr_result: AsrResult | None,
words: list[Any] | None = None,
started_at: float,
) -> PipelineResult:
text = (transcript or "").strip()
alignment_ms = 0.0
alignment_applied = 0
alignment_skipped = 0
alignment_decisions: list[AlignmentDecision] = []
fact_guard_ms = 0.0
fact_guard_action = "accepted"
fact_guard_violations = 0
fact_guard_details: list[FactGuardViolation] = []
aligned_text = text
alignment_started = time.perf_counter()
try:
alignment_result = self._alignment_engine.apply(
text,
list(words if words is not None else (asr_result.words if asr_result else [])),
)
aligned_text = (alignment_result.draft_text or "").strip() or text
alignment_applied = alignment_result.applied_count
alignment_skipped = alignment_result.skipped_count
alignment_decisions = alignment_result.decisions
except Exception:
aligned_text = text
alignment_ms = (time.perf_counter() - alignment_started) * 1000.0
editor_result: EditorResult | None = None
text = aligned_text
if text:
editor_result = self._editor_stage.rewrite(
text,
language=language,
dictionary_context=self._vocabulary.build_ai_dictionary_context(),
)
candidate = (editor_result.final_text or "").strip()
if candidate:
text = candidate
fact_guard_started = time.perf_counter()
fact_guard_result = self._fact_guard_engine.apply(
source_text=aligned_text,
candidate_text=text,
enabled=self._safety_enabled,
strict=self._safety_strict,
)
fact_guard_ms = (time.perf_counter() - fact_guard_started) * 1000.0
fact_guard_action = fact_guard_result.action
fact_guard_violations = fact_guard_result.violations_count
fact_guard_details = fact_guard_result.violations
text = (fact_guard_result.final_text or "").strip()
if fact_guard_action == "rejected":
raise RuntimeError(
f"fact guard rejected editor output ({fact_guard_violations} violation(s))"
)
vocab_started = time.perf_counter()
text = self._vocabulary.apply_deterministic_replacements(text).strip()
vocabulary_ms = (time.perf_counter() - vocab_started) * 1000.0
total_ms = (time.perf_counter() - started_at) * 1000.0
return PipelineResult(
asr=asr_result,
editor=editor_result,
output_text=text,
alignment_ms=alignment_ms,
alignment_applied=alignment_applied,
alignment_skipped=alignment_skipped,
alignment_decisions=alignment_decisions,
fact_guard_ms=fact_guard_ms,
fact_guard_action=fact_guard_action,
fact_guard_violations=fact_guard_violations,
fact_guard_details=fact_guard_details,
vocabulary_ms=vocabulary_ms,
total_ms=total_ms,
)