Streamline Llama processor

This commit is contained in:
Thales Maciel 2026-02-24 13:32:28 -03:00
parent 0ca78b675e
commit f3a1f48d04
No known key found for this signature in database
GPG key ID: 33112E6833C34679
2 changed files with 17 additions and 31 deletions

View file

@ -17,11 +17,12 @@ SYSTEM_PROMPT = (
"Rules:\n" "Rules:\n"
"- Remove filler words (um/uh/like), false starts, and self-corrections.\n" "- Remove filler words (um/uh/like), false starts, and self-corrections.\n"
"- Keep meaning, facts, and intent.\n" "- Keep meaning, facts, and intent.\n"
"- Preserve greetings and salutations.\n"
"- Prefer concise sentences.\n" "- Prefer concise sentences.\n"
"- Do not add new info.\n" "- Do not add new info.\n"
"- Output ONLY the cleaned text, no commentary.\n\n" "- Output ONLY the cleaned text, no commentary.\n\n"
"Examples:\n" "Examples:\n"
" - \"schedule that for 5 PM, I mean 4 PM\" -> \"schedule that for 4 PM\"\n" " - \"Hey, schedule that for 5 PM, I mean 4 PM\" -> \"Hey, schedule that for 4 PM\"\n"
" - \"let's ask Bob, I mean Janice, let's ask Janice\" -> \"let's ask Janice\"\n" " - \"let's ask Bob, I mean Janice, let's ask Janice\" -> \"let's ask Janice\"\n"
) )
@ -32,36 +33,27 @@ MODEL_URL = (
) )
MODEL_DIR = Path.home() / ".cache" / "lel" / "models" MODEL_DIR = Path.home() / ".cache" / "lel" / "models"
MODEL_PATH = MODEL_DIR / MODEL_NAME MODEL_PATH = MODEL_DIR / MODEL_NAME
LLM_LANGUAGE = "en"
@dataclass
class LLMConfig:
model_path: Path
n_ctx: int = 4096
verbose: bool = False
class LlamaProcessor: class LlamaProcessor:
def __init__(self, cfg: LLMConfig): def __init__(self, verbose=False):
self.cfg = cfg ensure_model()
if not cfg.verbose: if not verbose:
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR") os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR") os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
self._log_callback = _llama_log_callback_factory(cfg.verbose) self._log_callback = _llama_log_callback_factory(verbose)
llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p()) llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p())
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama") os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::") os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
self.client = Llama( self.client = Llama(
model_path=str(cfg.model_path), model_path=str(MODEL_PATH),
n_ctx=cfg.n_ctx, n_ctx=4096,
verbose=cfg.verbose, verbose=verbose,
) )
def process(self, text: str) -> str: def process(self, text: str, lang: str = "en") -> str:
user_content = f"<transcript>{text}</transcript>" user_content = f"<transcript>{text}</transcript>"
if LLM_LANGUAGE: user_content = f"<language>{lang}</language>\n{user_content}"
user_content = f"<language>{LLM_LANGUAGE}</language>\n{user_content}"
response = self.client.create_chat_completion( response = self.client.create_chat_completion(
messages=[ messages=[
{"role": "system", "content": SYSTEM_PROMPT}, {"role": "system", "content": SYSTEM_PROMPT},
@ -72,12 +64,7 @@ class LlamaProcessor:
return _extract_chat_text(response) return _extract_chat_text(response)
def build_processor(verbose: bool = False) -> LlamaProcessor: def ensure_model():
model_path = ensure_model()
return LlamaProcessor(LLMConfig(model_path=model_path, verbose=verbose))
def ensure_model() -> Path:
if MODEL_PATH.exists(): if MODEL_PATH.exists():
return MODEL_PATH return MODEL_PATH
MODEL_DIR.mkdir(parents=True, exist_ok=True) MODEL_DIR.mkdir(parents=True, exist_ok=True)
@ -111,7 +98,6 @@ def ensure_model() -> Path:
except Exception: except Exception:
pass pass
raise raise
return MODEL_PATH
def _extract_chat_text(payload: Any) -> str: def _extract_chat_text(payload: Any) -> str:

View file

@ -14,7 +14,7 @@ from faster_whisper import WhisperModel
from config import Config, load, redacted_dict from config import Config, load, redacted_dict
from recorder import start_recording, stop_recording from recorder import start_recording, stop_recording
from aiprocess import build_processor from aiprocess import LlamaProcessor
from desktop import get_desktop_adapter from desktop import get_desktop_adapter
@ -38,7 +38,7 @@ def _compute_type(device: str) -> str:
class Daemon: class Daemon:
def __init__(self, cfg: Config, desktop, *, llama_verbose: bool = False): def __init__(self, cfg: Config, desktop, *, verbose: bool = False):
self.cfg = cfg self.cfg = cfg
self.desktop = desktop self.desktop = desktop
self.lock = threading.Lock() self.lock = threading.Lock()
@ -51,7 +51,7 @@ class Daemon:
device=cfg.stt.get("device", "cpu"), device=cfg.stt.get("device", "cpu"),
compute_type=_compute_type(cfg.stt.get("device", "cpu")), compute_type=_compute_type(cfg.stt.get("device", "cpu")),
) )
self.ai_processor = build_processor(verbose=llama_verbose) self.ai_processor = LlamaProcessor(verbose=verbose)
def set_state(self, state: str): def set_state(self, state: str):
with self.lock: with self.lock:
@ -210,7 +210,6 @@ def main():
cfg = load(args.config) cfg = load(args.config)
_lock_single_instance() _lock_single_instance()
logging.info("ready")
logging.info("hotkey: %s", cfg.daemon.get("hotkey", "")) logging.info("hotkey: %s", cfg.daemon.get("hotkey", ""))
logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2)) logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2))
@ -218,7 +217,7 @@ def main():
logging.getLogger().setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG)
desktop = get_desktop_adapter() desktop = get_desktop_adapter()
try: try:
daemon = Daemon(cfg, desktop, llama_verbose=args.verbose) daemon = Daemon(cfg, desktop, verbose=args.verbose)
except Exception as exc: except Exception as exc:
logging.error("startup failed: %s", exc) logging.error("startup failed: %s", exc)
raise SystemExit(1) raise SystemExit(1)
@ -238,6 +237,7 @@ def main():
cfg.daemon.get("hotkey", ""), cfg.daemon.get("hotkey", ""),
lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(), lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(),
) )
logging.info("ready")
desktop.run_tray(daemon.get_state, daemon._quit) desktop.run_tray(daemon.get_state, daemon._quit)