From c4433e5a20e80b2677bc7dbd36eab13f443e024c Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Wed, 11 Mar 2026 13:50:07 -0300 Subject: [PATCH] Preserve alignment edits without ASR words Keep transcript-only runs eligible for alignment heuristics instead of bailing out when the ASR stage does not supply word timings. Build fallback AsrWord entries from the transcript so cue-based corrections like "i mean" still apply, while reusing the existing literal guard for verbatim phrases. Cover the new path in alignment and pipeline tests, and validate with python3 -m unittest tests.test_alignment_edits tests.test_pipeline_engine. --- src/stages/alignment_edits.py | 43 +++++++++++++++++++++++++++++------ tests/test_alignment_edits.py | 18 +++++++++++++++ tests/test_pipeline_engine.py | 17 ++++++++++++++ 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/stages/alignment_edits.py b/src/stages/alignment_edits.py index ce01cf7..c48cc00 100644 --- a/src/stages/alignment_edits.py +++ b/src/stages/alignment_edits.py @@ -33,7 +33,7 @@ class AlignmentResult: class AlignmentHeuristicEngine: def apply(self, transcript: str, words: list[AsrWord]) -> AlignmentResult: base_text = (transcript or "").strip() - if not base_text or not words: + if not base_text: return AlignmentResult( draft_text=base_text, decisions=[], @@ -41,17 +41,26 @@ class AlignmentHeuristicEngine: skipped_count=0, ) - normalized_words = [_normalize_token(word.text) for word in words] + working_words = list(words) if words else _fallback_words_from_transcript(base_text) + if not working_words: + return AlignmentResult( + draft_text=base_text, + decisions=[], + applied_count=0, + skipped_count=0, + ) + + normalized_words = [_normalize_token(word.text) for word in working_words] literal_guard = _has_literal_guard(base_text) out_tokens: list[str] = [] decisions: list[AlignmentDecision] = [] i = 0 - while i < len(words): - cue = _match_cue(words, normalized_words, i) + while i < len(working_words): + cue = _match_cue(working_words, normalized_words, i) if cue is not None and out_tokens: cue_len, cue_label = cue correction_start = i + cue_len - correction_end = _capture_phrase_end(words, correction_start) + correction_end = _capture_phrase_end(working_words, correction_start) if correction_end <= correction_start: decisions.append( AlignmentDecision( @@ -65,7 +74,7 @@ class AlignmentHeuristicEngine: ) i += cue_len continue - correction_tokens = _slice_clean_words(words, correction_start, correction_end) + correction_tokens = _slice_clean_words(working_words, correction_start, correction_end) if not correction_tokens: i = correction_end continue @@ -113,7 +122,7 @@ class AlignmentHeuristicEngine: i = correction_end continue - token = _strip_token(words[i].text) + token = _strip_token(working_words[i].text) if token: out_tokens.append(token) i += 1 @@ -296,3 +305,23 @@ def _has_literal_guard(text: str) -> bool: "quote", ) return any(guard in normalized for guard in guards) + + +def _fallback_words_from_transcript(text: str) -> list[AsrWord]: + tokens = [item for item in (text or "").split() if item.strip()] + if not tokens: + return [] + words: list[AsrWord] = [] + start = 0.0 + step = 0.15 + for token in tokens: + words.append( + AsrWord( + text=token, + start_s=start, + end_s=start + 0.1, + prob=None, + ) + ) + start += step + return words diff --git a/tests/test_alignment_edits.py b/tests/test_alignment_edits.py index 0e8fb4e..da32fb2 100644 --- a/tests/test_alignment_edits.py +++ b/tests/test_alignment_edits.py @@ -47,6 +47,15 @@ class AlignmentHeuristicEngineTests(unittest.TestCase): self.assertEqual(result.applied_count, 1) self.assertTrue(any(item.rule_id == "cue_correction" for item in result.decisions)) + def test_applies_i_mean_tail_correction_without_asr_words(self): + engine = AlignmentHeuristicEngine() + + result = engine.apply("schedule for 5, i mean 6", []) + + self.assertEqual(result.draft_text, "schedule for 6") + self.assertEqual(result.applied_count, 1) + self.assertTrue(any(item.rule_id == "cue_correction" for item in result.decisions)) + def test_preserves_literal_i_mean_context(self): engine = AlignmentHeuristicEngine() words = _words(["write", "exactly", "i", "mean", "this", "sincerely"]) @@ -57,6 +66,15 @@ class AlignmentHeuristicEngineTests(unittest.TestCase): self.assertEqual(result.applied_count, 0) self.assertGreaterEqual(result.skipped_count, 1) + def test_preserves_literal_i_mean_context_without_asr_words(self): + engine = AlignmentHeuristicEngine() + + result = engine.apply("write exactly i mean this sincerely", []) + + self.assertEqual(result.draft_text, "write exactly i mean this sincerely") + self.assertEqual(result.applied_count, 0) + self.assertGreaterEqual(result.skipped_count, 1) + def test_collapses_exact_restart_repetition(self): engine = AlignmentHeuristicEngine() words = _words(["please", "send", "it", "please", "send", "it"]) diff --git a/tests/test_pipeline_engine.py b/tests/test_pipeline_engine.py index cb8e5eb..5d3c281 100644 --- a/tests/test_pipeline_engine.py +++ b/tests/test_pipeline_engine.py @@ -93,6 +93,23 @@ class PipelineEngineTests(unittest.TestCase): self.assertEqual(result.fact_guard_action, "accepted") self.assertEqual(result.fact_guard_violations, 0) + def test_run_transcript_without_words_applies_i_mean_correction(self): + editor = _FakeEditor() + pipeline = PipelineEngine( + asr_stage=None, + editor_stage=editor, + vocabulary=VocabularyEngine(VocabularyConfig()), + alignment_engine=AlignmentHeuristicEngine(), + ) + + result = pipeline.run_transcript("schedule for 5, i mean 6", language="en") + + self.assertEqual(editor.calls[0]["transcript"], "schedule for 6") + self.assertEqual(result.output_text, "schedule for 6") + self.assertEqual(result.alignment_applied, 1) + self.assertEqual(result.fact_guard_action, "accepted") + self.assertEqual(result.fact_guard_violations, 0) + def test_fact_guard_fallbacks_when_editor_changes_number(self): editor = _FakeEditor(output_text="set alarm for 8") pipeline = PipelineEngine(