pyro-mcp/tests/test_ollama_demo.py

from __future__ import annotations

import argparse
import json
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any

import pytest

import pyro_mcp.ollama_demo as ollama_demo
from pyro_mcp.vm_manager import VmManager as RealVmManager


@pytest.fixture(autouse=True)
def _mock_vm_manager_for_tests(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
    class TestVmManager(RealVmManager):
        def __init__(self) -> None:
            super().__init__(backend_name="mock", base_dir=tmp_path / "vms")

    monkeypatch.setattr(ollama_demo, "VmManager", TestVmManager)


def _stepwise_model_response(payload: dict[str, Any], step: int) -> dict[str, Any]:
    if step == 1:
        return {
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [{"id": "1", "function": {"name": "vm_list_profiles"}}],
                    }
                }
            ]
        }
    if step == 2:
        return {
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": "2",
                                "function": {
                                    "name": "vm_create",
                                    "arguments": json.dumps(
                                        {"profile": "debian-git", "vcpu_count": 1, "mem_mib": 512}
                                    ),
                                },
                            }
                        ],
                    }
                }
            ]
        }
    if step == 3:
        vm_id = json.loads(payload["messages"][-1]["content"])["vm_id"]
        return {
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": "3",
                                "function": {
                                    "name": "vm_start",
                                    "arguments": json.dumps({"vm_id": vm_id}),
                                },
                            }
                        ],
                    }
                }
            ]
        }
    if step == 4:
        vm_id = json.loads(payload["messages"][-1]["content"])["vm_id"]
        return {
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": "4",
                                "function": {
                                    "name": "vm_exec",
                                    "arguments": json.dumps(
                                        {
                                            "vm_id": vm_id,
                                            "command": "printf 'git version 2.44.0\\n'",
                                        }
                                    ),
                                },
                            }
                        ],
                    }
                }
            ]
        }
    return {
        "choices": [
            {"message": {"role": "assistant", "content": "Executed git command in ephemeral VM."}}
        ]
    }


def test_run_ollama_tool_demo_happy_path(monkeypatch: pytest.MonkeyPatch) -> None:
    requests: list[dict[str, Any]] = []

    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        assert base_url == "http://localhost:11434/v1"
        requests.append(payload)
        return _stepwise_model_response(payload, len(requests))

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)

    logs: list[str] = []
    result = ollama_demo.run_ollama_tool_demo(log=logs.append)

    assert result["fallback_used"] is False
    assert "git version" in str(result["exec_result"]["stdout"])
    assert result["final_response"] == "Executed git command in ephemeral VM."
    assert len(result["tool_events"]) == 4
    assert any("[tool] calling vm_exec" in line for line in logs)


def test_run_ollama_tool_demo_recovers_from_bad_vm_id(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    requests: list[dict[str, Any]] = []

    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        assert base_url == "http://localhost:11434/v1"
        requests.append(payload)
        step = len(requests)
        if step == 1:
            return {
                "choices": [
                    {
                        "message": {
                            "role": "assistant",
                            "tool_calls": [
                                {
                                    "id": "1",
                                    "function": {
                                        "name": "vm_exec",
                                        "arguments": json.dumps(
                                            {
                                                "vm_id": "vm_list_profiles",
                                                "command": "git --version",
                                            }
                                        ),
                                    },
                                }
                            ],
                        }
                    }
                ]
            }
        return _stepwise_model_response(payload, step - 1)

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)

    result = ollama_demo.run_ollama_tool_demo()
    first_event = result["tool_events"][0]
    assert first_event["tool_name"] == "vm_exec"
    assert first_event["success"] is False
    assert "does not exist" in str(first_event["result"]["error"])
    assert int(result["exec_result"]["exit_code"]) == 0


def test_run_ollama_tool_demo_raises_without_vm_exec(monkeypatch: pytest.MonkeyPatch) -> None:
    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        del base_url, payload
        return {"choices": [{"message": {"role": "assistant", "content": "No tools"}}]}

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
    with pytest.raises(RuntimeError, match="did not execute a successful vm_exec"):
        ollama_demo.run_ollama_tool_demo()


def test_run_ollama_tool_demo_uses_fallback_when_not_strict(
    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        del base_url, payload
        return {"choices": [{"message": {"role": "assistant", "content": "No tools"}}]}

    class TestVmManager(RealVmManager):
        def __init__(self) -> None:
            super().__init__(backend_name="mock", base_dir=tmp_path / "vms")

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
    monkeypatch.setattr(ollama_demo, "VmManager", TestVmManager)
    logs: list[str] = []
    result = ollama_demo.run_ollama_tool_demo(strict=False, log=logs.append)
    assert result["fallback_used"] is True
    assert int(result["exec_result"]["exit_code"]) == 0
    assert any("[fallback]" in line for line in logs)


@pytest.mark.parametrize(
    ("tool_call", "error"),
    [
        (1, "invalid tool call entry"),
        ({"id": "", "function": {"name": "vm_list_profiles"}}, "valid call id"),
        ({"id": "1"}, "function metadata"),
        ({"id": "1", "function": {"name": 3}}, "name is invalid"),
    ],
)
def test_run_ollama_tool_demo_tool_call_validation(
    monkeypatch: pytest.MonkeyPatch,
    tool_call: Any,
    error: str,
) -> None:
    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        del base_url, payload
        return {"choices": [{"message": {"role": "assistant", "tool_calls": [tool_call]}}]}

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
    with pytest.raises(RuntimeError, match=error):
        ollama_demo.run_ollama_tool_demo()


def test_run_ollama_tool_demo_max_rounds(monkeypatch: pytest.MonkeyPatch) -> None:
    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        del base_url, payload
        return {
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "tool_calls": [{"id": "1", "function": {"name": "vm_list_profiles"}}],
                    }
                }
            ]
        }

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
    with pytest.raises(RuntimeError, match="exceeded maximum rounds"):
        ollama_demo.run_ollama_tool_demo()


@pytest.mark.parametrize(
    ("exec_result", "error"),
    [
        ("bad", "result shape is invalid"),
        ({"exit_code": 1, "stdout": "git version 2"}, "expected exit_code=0"),
        ({"exit_code": 0, "stdout": "no git"}, "did not contain `git version`"),
    ],
)
def test_run_ollama_tool_demo_exec_result_validation(
    monkeypatch: pytest.MonkeyPatch,
    exec_result: Any,
    error: str,
) -> None:
    responses: list[dict[str, Any]] = [
        {
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "tool_calls": [
                            {"id": "1", "function": {"name": "vm_exec", "arguments": "{}"}}
                        ],
                    }
                }
            ]
        },
        {"choices": [{"message": {"role": "assistant", "content": "done"}}]},
    ]

    def fake_post_chat_completion(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
        del base_url, payload
        return responses.pop(0)

    def fake_dispatch(manager: Any, tool_name: str, arguments: dict[str, Any]) -> Any:
        del manager, arguments
        if tool_name == "vm_exec":
            return exec_result
        return {"ok": True}

    monkeypatch.setattr(ollama_demo, "_post_chat_completion", fake_post_chat_completion)
    monkeypatch.setattr(ollama_demo, "_dispatch_tool_call", fake_dispatch)
    with pytest.raises(RuntimeError, match=error):
        ollama_demo.run_ollama_tool_demo()


def test_dispatch_tool_call_coverage(tmp_path: Path) -> None:
    manager = RealVmManager(backend_name="mock", base_dir=tmp_path / "vms")
    profiles = ollama_demo._dispatch_tool_call(manager, "vm_list_profiles", {})
    assert "profiles" in profiles
    created = ollama_demo._dispatch_tool_call(
        manager,
        "vm_create",
        {"profile": "debian-base", "vcpu_count": 1, "mem_mib": 512},
    )
    vm_id = str(created["vm_id"])
    started = ollama_demo._dispatch_tool_call(manager, "vm_start", {"vm_id": vm_id})
    assert started["state"] == "started"
    status = ollama_demo._dispatch_tool_call(manager, "vm_status", {"vm_id": vm_id})
    assert status["vm_id"] == vm_id
    executed = ollama_demo._dispatch_tool_call(
        manager, "vm_exec", {"vm_id": vm_id, "command": "printf 'git version\\n'"}
    )
    assert int(executed["exit_code"]) == 0
    with pytest.raises(RuntimeError, match="unexpected tool requested by model"):
        ollama_demo._dispatch_tool_call(manager, "nope", {})


def test_format_tool_error() -> None:
    error = ValueError("bad args")
    result = ollama_demo._format_tool_error("vm_exec", {"vm_id": "x"}, error)
    assert result["ok"] is False
    assert result["error_type"] == "ValueError"


@pytest.mark.parametrize(
    ("arguments", "error"),
    [
        ({}, "must be a non-empty string"),
        ({"k": 3}, "must be a non-empty string"),
    ],
)
def test_require_str(arguments: dict[str, Any], error: str) -> None:
    with pytest.raises(ValueError, match=error):
        ollama_demo._require_str(arguments, "k")


def test_require_int_validation() -> None:
    with pytest.raises(ValueError, match="must be an integer"):
        ollama_demo._require_int({"k": "1"}, "k")
    assert ollama_demo._require_int({"k": 1}, "k") == 1


def test_post_chat_completion_success(monkeypatch: pytest.MonkeyPatch) -> None:
    class StubResponse:
        def __enter__(self) -> StubResponse:
            return self

        def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
            del exc_type, exc, tb

        def read(self) -> bytes:
            return b'{"ok": true}'

    def fake_urlopen(request: Any, timeout: int) -> StubResponse:
        assert timeout == 90
        assert request.full_url == "http://localhost:11434/v1/chat/completions"
        return StubResponse()

    monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
    assert ollama_demo._post_chat_completion("http://localhost:11434/v1", {"x": 1}) == {"ok": True}


def test_post_chat_completion_errors(monkeypatch: pytest.MonkeyPatch) -> None:
    def fake_urlopen_error(request: Any, timeout: int) -> Any:
        del request, timeout
        raise urllib.error.URLError("boom")

    monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen_error)
    with pytest.raises(RuntimeError, match="failed to call Ollama"):
        ollama_demo._post_chat_completion("http://localhost:11434/v1", {"x": 1})

    class StubResponse:
        def __enter__(self) -> StubResponse:
            return self

        def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
            del exc_type, exc, tb

        def read(self) -> bytes:
            return b'["bad"]'

    def fake_urlopen_non_object(request: Any, timeout: int) -> StubResponse:
        del request, timeout
        return StubResponse()

    monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen_non_object)
    with pytest.raises(TypeError, match="unexpected Ollama response shape"):
        ollama_demo._post_chat_completion("http://localhost:11434/v1", {"x": 1})


@pytest.mark.parametrize(
    ("raw", "expected"),
    [(None, {}), ({}, {}), ("", {}), ('{"a":1}', {"a": 1})],
)
def test_parse_tool_arguments(raw: Any, expected: dict[str, Any]) -> None:
    assert ollama_demo._parse_tool_arguments(raw) == expected


def test_parse_tool_arguments_invalid() -> None:
    with pytest.raises(TypeError, match="decode to an object"):
        ollama_demo._parse_tool_arguments("[]")
    with pytest.raises(TypeError, match="dictionary or JSON object string"):
        ollama_demo._parse_tool_arguments(3)


@pytest.mark.parametrize(
    ("response", "msg"),
    [
        ({}, "did not contain completion choices"),
        ({"choices": [1]}, "unexpected completion choice format"),
        ({"choices": [{"message": "bad"}]}, "did not contain a message"),
    ],
)
def test_extract_message_validation(response: dict[str, Any], msg: str) -> None:
    with pytest.raises(RuntimeError, match=msg):
        ollama_demo._extract_message(response)


def test_build_parser_defaults() -> None:
    parser = ollama_demo._build_parser()
    args = parser.parse_args([])
    assert args.model == ollama_demo.DEFAULT_OLLAMA_MODEL
    assert args.base_url == ollama_demo.DEFAULT_OLLAMA_BASE_URL


def test_main_uses_parser_and_prints_logs(
    monkeypatch: pytest.MonkeyPatch,
    capsys: pytest.CaptureFixture[str],
) -> None:
    class StubParser:
        def parse_args(self) -> argparse.Namespace:
            return argparse.Namespace(base_url="http://x", model="m")

    monkeypatch.setattr(ollama_demo, "_build_parser", lambda: StubParser())
    monkeypatch.setattr(
        ollama_demo,
        "run_ollama_tool_demo",
        lambda base_url, model, strict=True, log=None: {
            "exec_result": {"exit_code": 0, "stdout": "git version 2.44.0\n"},
            "fallback_used": False,
        },
    )
    ollama_demo.main()
    output = capsys.readouterr().out
    assert "[summary] exit_code=0 fallback_used=False" in output
    assert "[summary] stdout=git version 2.44.0" in output