Add benchmark-driven model promotion workflow and pipeline stages
Some checks failed
ci / test-and-build (push) Has been cancelled
Some checks failed
ci / test-and-build (push) Has been cancelled
This commit is contained in:
parent
98b13d1069
commit
8c1f7c1e13
38 changed files with 5300 additions and 503 deletions
86
tests/test_fact_guard.py
Normal file
86
tests/test_fact_guard.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from stages.fact_guard import FactGuardEngine
|
||||
|
||||
|
||||
class FactGuardEngineTests(unittest.TestCase):
|
||||
def test_disabled_guard_accepts_candidate(self):
|
||||
guard = FactGuardEngine()
|
||||
|
||||
result = guard.apply(
|
||||
"set alarm for 7",
|
||||
"set alarm for 8",
|
||||
enabled=False,
|
||||
strict=False,
|
||||
)
|
||||
|
||||
self.assertEqual(result.action, "accepted")
|
||||
self.assertEqual(result.final_text, "set alarm for 8")
|
||||
self.assertEqual(result.violations_count, 0)
|
||||
|
||||
def test_fallbacks_on_number_change(self):
|
||||
guard = FactGuardEngine()
|
||||
|
||||
result = guard.apply(
|
||||
"set alarm for 7",
|
||||
"set alarm for 8",
|
||||
enabled=True,
|
||||
strict=False,
|
||||
)
|
||||
|
||||
self.assertEqual(result.action, "fallback")
|
||||
self.assertEqual(result.final_text, "set alarm for 7")
|
||||
self.assertGreaterEqual(result.violations_count, 1)
|
||||
|
||||
def test_fallbacks_on_name_change(self):
|
||||
guard = FactGuardEngine()
|
||||
|
||||
result = guard.apply(
|
||||
"invite Marta tomorrow",
|
||||
"invite Martha tomorrow",
|
||||
enabled=True,
|
||||
strict=False,
|
||||
)
|
||||
|
||||
self.assertEqual(result.action, "fallback")
|
||||
self.assertEqual(result.final_text, "invite Marta tomorrow")
|
||||
self.assertGreaterEqual(result.violations_count, 1)
|
||||
|
||||
def test_accepts_style_only_rewrite(self):
|
||||
guard = FactGuardEngine()
|
||||
|
||||
result = guard.apply(
|
||||
"please send the report",
|
||||
"Please send the report.",
|
||||
enabled=True,
|
||||
strict=False,
|
||||
)
|
||||
|
||||
self.assertEqual(result.action, "accepted")
|
||||
self.assertEqual(result.final_text, "Please send the report.")
|
||||
self.assertEqual(result.violations_count, 0)
|
||||
|
||||
def test_strict_mode_rejects_large_lexical_additions(self):
|
||||
guard = FactGuardEngine()
|
||||
|
||||
result = guard.apply(
|
||||
"send the report",
|
||||
"send the report and include two extra paragraphs with assumptions",
|
||||
enabled=True,
|
||||
strict=True,
|
||||
)
|
||||
|
||||
self.assertEqual(result.action, "rejected")
|
||||
self.assertEqual(result.final_text, "send the report")
|
||||
self.assertGreaterEqual(result.violations_count, 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue