Add runtime capability scaffolding and align docs
This commit is contained in:
parent
fb8b985049
commit
cbf212bb7b
19 changed files with 1048 additions and 71 deletions
|
|
@ -94,7 +94,7 @@ def _stepwise_model_response(payload: dict[str, Any], step: int) -> dict[str, An
|
|||
"arguments": json.dumps(
|
||||
{
|
||||
"vm_id": vm_id,
|
||||
"command": "printf 'git version 2.44.0\\n'",
|
||||
"command": "printf 'true\\n'",
|
||||
}
|
||||
),
|
||||
},
|
||||
|
|
@ -125,14 +125,14 @@ def test_run_ollama_tool_demo_happy_path(monkeypatch: pytest.MonkeyPatch) -> Non
|
|||
result = ollama_demo.run_ollama_tool_demo(log=logs.append)
|
||||
|
||||
assert result["fallback_used"] is False
|
||||
assert "git version" in str(result["exec_result"]["stdout"])
|
||||
assert str(result["exec_result"]["stdout"]).strip() == "true"
|
||||
assert result["final_response"] == "Executed git command in ephemeral VM."
|
||||
assert len(result["tool_events"]) == 4
|
||||
assert any("[model] input user:" in line for line in logs)
|
||||
assert any("[model] output assistant:" in line for line in logs)
|
||||
assert any(line == "[model] input user" for line in logs)
|
||||
assert any(line == "[model] output assistant" for line in logs)
|
||||
assert any("[model] tool_call vm_exec" in line for line in logs)
|
||||
assert any("[tool] calling vm_exec" in line for line in logs)
|
||||
assert any("[tool] result vm_exec " in line for line in logs)
|
||||
assert any(line == "[tool] calling vm_exec" for line in logs)
|
||||
assert any(line == "[tool] result vm_exec" for line in logs)
|
||||
|
||||
|
||||
def test_run_ollama_tool_demo_recovers_from_bad_vm_id(
|
||||
|
|
@ -158,7 +158,7 @@ def test_run_ollama_tool_demo_recovers_from_bad_vm_id(
|
|||
"arguments": json.dumps(
|
||||
{
|
||||
"vm_id": "vm_list_profiles",
|
||||
"command": "git --version",
|
||||
"command": ollama_demo.NETWORK_PROOF_COMMAND,
|
||||
}
|
||||
),
|
||||
},
|
||||
|
|
@ -219,7 +219,7 @@ def test_run_ollama_tool_demo_resolves_vm_id_placeholder(
|
|||
"arguments": json.dumps(
|
||||
{
|
||||
"vm_id": "<vm_id_returned_by_vm_create>",
|
||||
"command": "printf 'git version 2.44.0\\n'",
|
||||
"command": "printf 'true\\n'",
|
||||
"timeout_seconds": "300",
|
||||
}
|
||||
),
|
||||
|
|
@ -292,14 +292,49 @@ def test_run_ollama_tool_demo_uses_fallback_when_not_strict(
|
|||
|
||||
monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
|
||||
monkeypatch.setattr(ollama_demo, "VmManager", TestVmManager)
|
||||
monkeypatch.setattr(
|
||||
ollama_demo,
|
||||
"_run_direct_lifecycle_fallback",
|
||||
lambda manager: {
|
||||
"vm_id": "vm-1",
|
||||
"command": ollama_demo.NETWORK_PROOF_COMMAND,
|
||||
"stdout": "true\n",
|
||||
"stderr": "",
|
||||
"exit_code": 0,
|
||||
"duration_ms": 5,
|
||||
"execution_mode": "host_compat",
|
||||
"cleanup": {"deleted": True, "reason": "post_exec_cleanup", "vm_id": "vm-1"},
|
||||
},
|
||||
)
|
||||
logs: list[str] = []
|
||||
result = ollama_demo.run_ollama_tool_demo(strict=False, log=logs.append)
|
||||
assert result["fallback_used"] is True
|
||||
assert int(result["exec_result"]["exit_code"]) == 0
|
||||
assert any("[model] output assistant: No tools" in line for line in logs)
|
||||
assert any(line == "[model] output assistant" for line in logs)
|
||||
assert any("[fallback]" in line for line in logs)
|
||||
|
||||
|
||||
def test_run_ollama_tool_demo_verbose_logs_values(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
requests = 0
|
||||
|
||||
def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
del base_url
|
||||
nonlocal requests
|
||||
requests += 1
|
||||
return _stepwise_model_response(payload, requests)
|
||||
|
||||
monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
|
||||
|
||||
logs: list[str] = []
|
||||
result = ollama_demo.run_ollama_tool_demo(verbose=True, log=logs.append)
|
||||
|
||||
assert result["fallback_used"] is False
|
||||
assert str(result["exec_result"]["stdout"]).strip() == "true"
|
||||
assert any("[model] input user:" in line for line in logs)
|
||||
assert any("[model] tool_call vm_list_profiles args={}" in line for line in logs)
|
||||
assert any("[tool] result vm_exec " in line for line in logs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("tool_call", "error"),
|
||||
[
|
||||
|
|
@ -346,8 +381,8 @@ def test_run_ollama_tool_demo_max_rounds(monkeypatch: pytest.MonkeyPatch) -> Non
|
|||
("exec_result", "error"),
|
||||
[
|
||||
("bad", "result shape is invalid"),
|
||||
({"exit_code": 1, "stdout": "git version 2"}, "expected exit_code=0"),
|
||||
({"exit_code": 0, "stdout": "no git"}, "did not contain `git version`"),
|
||||
({"exit_code": 1, "stdout": "true"}, "expected exit_code=0"),
|
||||
({"exit_code": 0, "stdout": "false"}, "did not confirm repository clone success"),
|
||||
],
|
||||
)
|
||||
def test_run_ollama_tool_demo_exec_result_validation(
|
||||
|
|
@ -404,7 +439,7 @@ def test_dispatch_tool_call_coverage(tmp_path: Path) -> None:
|
|||
executed = ollama_demo._dispatch_tool_call(
|
||||
manager,
|
||||
"vm_exec",
|
||||
{"vm_id": vm_id, "command": "printf 'git version\\n'", "timeout_seconds": "30"},
|
||||
{"vm_id": vm_id, "command": "printf 'true\\n'", "timeout_seconds": "30"},
|
||||
)
|
||||
assert int(executed["exit_code"]) == 0
|
||||
with pytest.raises(RuntimeError, match="unexpected tool requested by model"):
|
||||
|
|
@ -529,6 +564,13 @@ def test_build_parser_defaults() -> None:
|
|||
args = parser.parse_args([])
|
||||
assert args.model == ollama_demo.DEFAULT_OLLAMA_MODEL
|
||||
assert args.base_url == ollama_demo.DEFAULT_OLLAMA_BASE_URL
|
||||
assert args.verbose is False
|
||||
|
||||
|
||||
def test_build_parser_verbose_flag() -> None:
|
||||
parser = ollama_demo._build_parser()
|
||||
args = parser.parse_args(["-v"])
|
||||
assert args.verbose is True
|
||||
|
||||
|
||||
def test_main_uses_parser_and_prints_logs(
|
||||
|
|
@ -537,21 +579,51 @@ def test_main_uses_parser_and_prints_logs(
|
|||
) -> None:
|
||||
class StubParser:
|
||||
def parse_args(self) -> argparse.Namespace:
|
||||
return argparse.Namespace(base_url="http://x", model="m")
|
||||
return argparse.Namespace(base_url="http://x", model="m", verbose=False)
|
||||
|
||||
monkeypatch.setattr(ollama_demo, "_build_parser", lambda: StubParser())
|
||||
monkeypatch.setattr(
|
||||
ollama_demo,
|
||||
"run_ollama_tool_demo",
|
||||
lambda base_url, model, strict=True, log=None: {
|
||||
"exec_result": {"exit_code": 0, "stdout": "git version 2.44.0\n"},
|
||||
lambda base_url, model, strict=True, verbose=False, log=None: {
|
||||
"exec_result": {
|
||||
"exit_code": 0,
|
||||
"stdout": "true\n",
|
||||
"execution_mode": "host_compat",
|
||||
},
|
||||
"fallback_used": False,
|
||||
},
|
||||
)
|
||||
ollama_demo.main()
|
||||
output = capsys.readouterr().out
|
||||
assert "[summary] exit_code=0 fallback_used=False" in output
|
||||
assert "[summary] stdout=git version 2.44.0" in output
|
||||
assert "[summary] exit_code=0 fallback_used=False execution_mode=host_compat" in output
|
||||
assert "[summary] stdout=" not in output
|
||||
|
||||
|
||||
def test_main_verbose_prints_stdout(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
) -> None:
|
||||
class StubParser:
|
||||
def parse_args(self) -> argparse.Namespace:
|
||||
return argparse.Namespace(base_url="http://x", model="m", verbose=True)
|
||||
|
||||
monkeypatch.setattr(ollama_demo, "_build_parser", lambda: StubParser())
|
||||
monkeypatch.setattr(
|
||||
ollama_demo,
|
||||
"run_ollama_tool_demo",
|
||||
lambda base_url, model, strict=True, verbose=False, log=None: {
|
||||
"exec_result": {
|
||||
"exit_code": 0,
|
||||
"stdout": "true\n",
|
||||
"execution_mode": "host_compat",
|
||||
},
|
||||
"fallback_used": False,
|
||||
},
|
||||
)
|
||||
ollama_demo.main()
|
||||
output = capsys.readouterr().out
|
||||
assert "[summary] stdout=true" in output
|
||||
|
||||
|
||||
def test_main_logs_error_and_exits_nonzero(
|
||||
|
|
@ -560,12 +632,18 @@ def test_main_logs_error_and_exits_nonzero(
|
|||
) -> None:
|
||||
class StubParser:
|
||||
def parse_args(self) -> argparse.Namespace:
|
||||
return argparse.Namespace(base_url="http://x", model="m")
|
||||
return argparse.Namespace(base_url="http://x", model="m", verbose=False)
|
||||
|
||||
monkeypatch.setattr(ollama_demo, "_build_parser", lambda: StubParser())
|
||||
|
||||
def fake_run(base_url: str, model: str, strict: bool = True, log: Any = None) -> dict[str, Any]:
|
||||
del base_url, model, strict, log
|
||||
def fake_run(
|
||||
base_url: str,
|
||||
model: str,
|
||||
strict: bool = True,
|
||||
verbose: bool = False,
|
||||
log: Any = None,
|
||||
) -> dict[str, Any]:
|
||||
del base_url, model, strict, verbose, log
|
||||
raise RuntimeError("demo did not execute a successful vm_exec")
|
||||
|
||||
monkeypatch.setattr(ollama_demo, "run_ollama_tool_demo", fake_run)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue