Filter llama context warning

This commit is contained in:
Thales Maciel 2026-02-24 13:10:11 -03:00
parent 42cf10cce3
commit 3ba696fb7d
No known key found for this signature in database
GPG key ID: 33112E6833C34679

View file

@ -1,12 +1,15 @@
from __future__ import annotations
import ctypes
import logging
import os
import sys
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, cast
from llama_cpp import Llama # type: ignore[import-not-found]
from llama_cpp import Llama, llama_cpp as llama_cpp_lib # type: ignore[import-not-found]
SYSTEM_PROMPT = (
@ -42,6 +45,11 @@ class LLMConfig:
class LlamaProcessor:
def __init__(self, cfg: LLMConfig):
self.cfg = cfg
if not cfg.verbose:
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
self._log_callback = _llama_log_callback_factory(cfg.verbose)
llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p())
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
self.client = Llama(
@ -106,7 +114,7 @@ def ensure_model() -> Path:
return MODEL_PATH
def _extract_chat_text(payload: dict) -> str:
def _extract_chat_text(payload: Any) -> str:
if "choices" in payload and payload["choices"]:
choice = payload["choices"][0]
msg = choice.get("message") or {}
@ -114,3 +122,19 @@ def _extract_chat_text(payload: dict) -> str:
if content is not None:
return str(content).strip()
raise RuntimeError("unexpected response format")
def _llama_log_callback_factory(verbose: bool) -> Callable:
callback_t = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p)
def raw_callback(_level, text, _user_data):
message = text.decode("utf-8", errors="ignore") if text else ""
if "n_ctx_per_seq" in message:
return
if not verbose:
return
sys.stderr.write(f"llama::{message}")
if message and not message.endswith("\n"):
sys.stderr.write("\n")
return callback_t(raw_callback)