Streamline Llama processor

2026-02-24 13:32:28 -03:00 · 2026-02-24 13:32:28 -03:00 · f3a1f48d04
commit f3a1f48d04
parent 0ca78b675e
2 changed files with 17 additions and 31 deletions
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@ -17,11 +17,12 @@ SYSTEM_PROMPT = (
    "Rules:\n"
    "- Remove filler words (um/uh/like), false starts, and self-corrections.\n"
    "- Keep meaning, facts, and intent.\n"
+    "- Preserve greetings and salutations.\n"
    "- Prefer concise sentences.\n"
    "- Do not add new info.\n"
    "- Output ONLY the cleaned text, no commentary.\n\n"
    "Examples:\n"
-    "   - \"schedule that for 5 PM, I mean 4 PM\" -> \"schedule that for 4 PM\"\n"
+    "   - \"Hey, schedule that for 5 PM, I mean 4 PM\" -> \"Hey, schedule that for 4 PM\"\n"
    "   - \"let's ask Bob, I mean Janice, let's ask Janice\" -> \"let's ask Janice\"\n"
 )

@ -32,36 +33,27 @@ MODEL_URL = (
 )
 MODEL_DIR = Path.home() / ".cache" / "lel" / "models"
 MODEL_PATH = MODEL_DIR / MODEL_NAME
-LLM_LANGUAGE = "en"
-
-
-@dataclass
-class LLMConfig:
-    model_path: Path
-    n_ctx: int = 4096
-    verbose: bool = False


 class LlamaProcessor:
-    def __init__(self, cfg: LLMConfig):
-        self.cfg = cfg
-        if not cfg.verbose:
+    def __init__(self, verbose=False):
+        ensure_model()
+        if not verbose:
            os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
            os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
-        self._log_callback = _llama_log_callback_factory(cfg.verbose)
+        self._log_callback = _llama_log_callback_factory(verbose)
        llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p())
        os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
        os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
        self.client = Llama(
-            model_path=str(cfg.model_path),
-            n_ctx=cfg.n_ctx,
-            verbose=cfg.verbose,
+            model_path=str(MODEL_PATH),
+            n_ctx=4096,
+            verbose=verbose,
        )

-    def process(self, text: str) -> str:
+    def process(self, text: str, lang: str = "en") -> str:
        user_content = f"<transcript>{text}</transcript>"
-        if LLM_LANGUAGE:
-            user_content = f"<language>{LLM_LANGUAGE}</language>\n{user_content}"
+        user_content = f"<language>{lang}</language>\n{user_content}"
        response = self.client.create_chat_completion(
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
@ -72,12 +64,7 @@ class LlamaProcessor:
        return _extract_chat_text(response)


-def build_processor(verbose: bool = False) -> LlamaProcessor:
-    model_path = ensure_model()
-    return LlamaProcessor(LLMConfig(model_path=model_path, verbose=verbose))
-
-
-def ensure_model() -> Path:
+def ensure_model():
    if MODEL_PATH.exists():
        return MODEL_PATH
    MODEL_DIR.mkdir(parents=True, exist_ok=True)
@ -111,7 +98,6 @@ def ensure_model() -> Path:
        except Exception:
            pass
        raise
-    return MODEL_PATH


 def _extract_chat_text(payload: Any) -> str: