Add benchmark-driven model promotion workflow and pipeline stages

2026-02-28 15:12:33 -03:00 · 2026-02-28 15:12:33 -03:00 · 8c1f7c1e13
commit 8c1f7c1e13
parent 98b13d1069
38 changed files with 5300 additions and 503 deletions
--- a/tests/test_fact_guard.py
+++ b/tests/test_fact_guard.py
@ -0,0 +1,86 @@
+import sys
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+from stages.fact_guard import FactGuardEngine
+
+
+class FactGuardEngineTests(unittest.TestCase):
+    def test_disabled_guard_accepts_candidate(self):
+        guard = FactGuardEngine()
+
+        result = guard.apply(
+            "set alarm for 7",
+            "set alarm for 8",
+            enabled=False,
+            strict=False,
+        )
+
+        self.assertEqual(result.action, "accepted")
+        self.assertEqual(result.final_text, "set alarm for 8")
+        self.assertEqual(result.violations_count, 0)
+
+    def test_fallbacks_on_number_change(self):
+        guard = FactGuardEngine()
+
+        result = guard.apply(
+            "set alarm for 7",
+            "set alarm for 8",
+            enabled=True,
+            strict=False,
+        )
+
+        self.assertEqual(result.action, "fallback")
+        self.assertEqual(result.final_text, "set alarm for 7")
+        self.assertGreaterEqual(result.violations_count, 1)
+
+    def test_fallbacks_on_name_change(self):
+        guard = FactGuardEngine()
+
+        result = guard.apply(
+            "invite Marta tomorrow",
+            "invite Martha tomorrow",
+            enabled=True,
+            strict=False,
+        )
+
+        self.assertEqual(result.action, "fallback")
+        self.assertEqual(result.final_text, "invite Marta tomorrow")
+        self.assertGreaterEqual(result.violations_count, 1)
+
+    def test_accepts_style_only_rewrite(self):
+        guard = FactGuardEngine()
+
+        result = guard.apply(
+            "please send the report",
+            "Please send the report.",
+            enabled=True,
+            strict=False,
+        )
+
+        self.assertEqual(result.action, "accepted")
+        self.assertEqual(result.final_text, "Please send the report.")
+        self.assertEqual(result.violations_count, 0)
+
+    def test_strict_mode_rejects_large_lexical_additions(self):
+        guard = FactGuardEngine()
+
+        result = guard.apply(
+            "send the report",
+            "send the report and include two extra paragraphs with assumptions",
+            enabled=True,
+            strict=True,
+        )
+
+        self.assertEqual(result.action, "rejected")
+        self.assertEqual(result.final_text, "send the report")
+        self.assertGreaterEqual(result.violations_count, 1)
+
+
+if __name__ == "__main__":
+    unittest.main()