From 3ba696fb7d25578b4651fa939f8cf29303a293e1 Mon Sep 17 00:00:00 2001
From: Thales Maciel <thales@thalesmaciel.com>
Date: Tue, 24 Feb 2026 13:10:11 -0300
Subject: [PATCH] Filter llama context warning

---
 src/aiprocess.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/aiprocess.py b/src/aiprocess.py
index 79f4c25..3e17363 100644
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
+import ctypes
 import logging
 import os
+import sys
 import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Any, Callable, cast
 
-from llama_cpp import Llama  # type: ignore[import-not-found]
+from llama_cpp import Llama, llama_cpp as llama_cpp_lib  # type: ignore[import-not-found]
 
 
 SYSTEM_PROMPT = (
@@ -42,6 +45,11 @@ class LLMConfig:
 class LlamaProcessor:
     def __init__(self, cfg: LLMConfig):
         self.cfg = cfg
+        if not cfg.verbose:
+            os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
+            os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
+        self._log_callback = _llama_log_callback_factory(cfg.verbose)
+        llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p())
         os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
         os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
         self.client = Llama(
@@ -106,7 +114,7 @@ def ensure_model() -> Path:
     return MODEL_PATH
 
 
-def _extract_chat_text(payload: dict) -> str:
+def _extract_chat_text(payload: Any) -> str:
     if "choices" in payload and payload["choices"]:
         choice = payload["choices"][0]
         msg = choice.get("message") or {}
@@ -114,3 +122,19 @@ def _extract_chat_text(payload: dict) -> str:
         if content is not None:
             return str(content).strip()
     raise RuntimeError("unexpected response format")
+
+
+def _llama_log_callback_factory(verbose: bool) -> Callable:
+    callback_t = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p)
+
+    def raw_callback(_level, text, _user_data):
+        message = text.decode("utf-8", errors="ignore") if text else ""
+        if "n_ctx_per_seq" in message:
+            return
+        if not verbose:
+            return
+        sys.stderr.write(f"llama::{message}")
+        if message and not message.endswith("\n"):
+            sys.stderr.write("\n")
+
+    return callback_t(raw_callback)