Preserve alignment edits without ASR words

Keep transcript-only runs eligible for alignment heuristics instead of bailing out when the ASR stage does not supply word timings. Build fallback AsrWord entries from the transcript so cue-based corrections like "i mean" still apply, while reusing the existing literal guard for verbatim phrases. Cover the new path in alignment and pipeline tests, and validate with python3 -m unittest tests.test_alignment_edits tests.test_pipeline_engine.
2026-03-11 13:50:07 -03:00 · 2026-03-11 13:50:07 -03:00 · c4433e5a20
commit c4433e5a20
parent 8169db98f4
3 changed files with 71 additions and 7 deletions
--- a/tests/test_alignment_edits.py
+++ b/tests/test_alignment_edits.py
@ -47,6 +47,15 @@ class AlignmentHeuristicEngineTests(unittest.TestCase):
        self.assertEqual(result.applied_count, 1)
        self.assertTrue(any(item.rule_id == "cue_correction" for item in result.decisions))

+    def test_applies_i_mean_tail_correction_without_asr_words(self):
+        engine = AlignmentHeuristicEngine()
+
+        result = engine.apply("schedule for 5, i mean 6", [])
+
+        self.assertEqual(result.draft_text, "schedule for 6")
+        self.assertEqual(result.applied_count, 1)
+        self.assertTrue(any(item.rule_id == "cue_correction" for item in result.decisions))
+
    def test_preserves_literal_i_mean_context(self):
        engine = AlignmentHeuristicEngine()
        words = _words(["write", "exactly", "i", "mean", "this", "sincerely"])
@ -57,6 +66,15 @@ class AlignmentHeuristicEngineTests(unittest.TestCase):
        self.assertEqual(result.applied_count, 0)
        self.assertGreaterEqual(result.skipped_count, 1)

+    def test_preserves_literal_i_mean_context_without_asr_words(self):
+        engine = AlignmentHeuristicEngine()
+
+        result = engine.apply("write exactly i mean this sincerely", [])
+
+        self.assertEqual(result.draft_text, "write exactly i mean this sincerely")
+        self.assertEqual(result.applied_count, 0)
+        self.assertGreaterEqual(result.skipped_count, 1)
+
    def test_collapses_exact_restart_repetition(self):
        engine = AlignmentHeuristicEngine()
        words = _words(["please", "send", "it", "please", "send", "it"])
--- a/tests/test_pipeline_engine.py
+++ b/tests/test_pipeline_engine.py
@ -93,6 +93,23 @@ class PipelineEngineTests(unittest.TestCase):
        self.assertEqual(result.fact_guard_action, "accepted")
        self.assertEqual(result.fact_guard_violations, 0)

+    def test_run_transcript_without_words_applies_i_mean_correction(self):
+        editor = _FakeEditor()
+        pipeline = PipelineEngine(
+            asr_stage=None,
+            editor_stage=editor,
+            vocabulary=VocabularyEngine(VocabularyConfig()),
+            alignment_engine=AlignmentHeuristicEngine(),
+        )
+
+        result = pipeline.run_transcript("schedule for 5, i mean 6", language="en")
+
+        self.assertEqual(editor.calls[0]["transcript"], "schedule for 6")
+        self.assertEqual(result.output_text, "schedule for 6")
+        self.assertEqual(result.alignment_applied, 1)
+        self.assertEqual(result.fact_guard_action, "accepted")
+        self.assertEqual(result.fact_guard_violations, 0)
+
    def test_fact_guard_fallbacks_when_editor_changes_number(self):
        editor = _FakeEditor(output_text="set alarm for 8")
        pipeline = PipelineEngine(