Streamline Llama processor
This commit is contained in:
parent
0ca78b675e
commit
f3a1f48d04
2 changed files with 17 additions and 31 deletions
|
|
@ -17,11 +17,12 @@ SYSTEM_PROMPT = (
|
||||||
"Rules:\n"
|
"Rules:\n"
|
||||||
"- Remove filler words (um/uh/like), false starts, and self-corrections.\n"
|
"- Remove filler words (um/uh/like), false starts, and self-corrections.\n"
|
||||||
"- Keep meaning, facts, and intent.\n"
|
"- Keep meaning, facts, and intent.\n"
|
||||||
|
"- Preserve greetings and salutations.\n"
|
||||||
"- Prefer concise sentences.\n"
|
"- Prefer concise sentences.\n"
|
||||||
"- Do not add new info.\n"
|
"- Do not add new info.\n"
|
||||||
"- Output ONLY the cleaned text, no commentary.\n\n"
|
"- Output ONLY the cleaned text, no commentary.\n\n"
|
||||||
"Examples:\n"
|
"Examples:\n"
|
||||||
" - \"schedule that for 5 PM, I mean 4 PM\" -> \"schedule that for 4 PM\"\n"
|
" - \"Hey, schedule that for 5 PM, I mean 4 PM\" -> \"Hey, schedule that for 4 PM\"\n"
|
||||||
" - \"let's ask Bob, I mean Janice, let's ask Janice\" -> \"let's ask Janice\"\n"
|
" - \"let's ask Bob, I mean Janice, let's ask Janice\" -> \"let's ask Janice\"\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -32,36 +33,27 @@ MODEL_URL = (
|
||||||
)
|
)
|
||||||
MODEL_DIR = Path.home() / ".cache" / "lel" / "models"
|
MODEL_DIR = Path.home() / ".cache" / "lel" / "models"
|
||||||
MODEL_PATH = MODEL_DIR / MODEL_NAME
|
MODEL_PATH = MODEL_DIR / MODEL_NAME
|
||||||
LLM_LANGUAGE = "en"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LLMConfig:
|
|
||||||
model_path: Path
|
|
||||||
n_ctx: int = 4096
|
|
||||||
verbose: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
class LlamaProcessor:
|
class LlamaProcessor:
|
||||||
def __init__(self, cfg: LLMConfig):
|
def __init__(self, verbose=False):
|
||||||
self.cfg = cfg
|
ensure_model()
|
||||||
if not cfg.verbose:
|
if not verbose:
|
||||||
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
|
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
|
||||||
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
|
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
|
||||||
self._log_callback = _llama_log_callback_factory(cfg.verbose)
|
self._log_callback = _llama_log_callback_factory(verbose)
|
||||||
llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p())
|
llama_cpp_lib.llama_log_set(cast(Any, self._log_callback), ctypes.c_void_p())
|
||||||
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
|
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
|
||||||
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
|
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
|
||||||
self.client = Llama(
|
self.client = Llama(
|
||||||
model_path=str(cfg.model_path),
|
model_path=str(MODEL_PATH),
|
||||||
n_ctx=cfg.n_ctx,
|
n_ctx=4096,
|
||||||
verbose=cfg.verbose,
|
verbose=verbose,
|
||||||
)
|
)
|
||||||
|
|
||||||
def process(self, text: str) -> str:
|
def process(self, text: str, lang: str = "en") -> str:
|
||||||
user_content = f"<transcript>{text}</transcript>"
|
user_content = f"<transcript>{text}</transcript>"
|
||||||
if LLM_LANGUAGE:
|
user_content = f"<language>{lang}</language>\n{user_content}"
|
||||||
user_content = f"<language>{LLM_LANGUAGE}</language>\n{user_content}"
|
|
||||||
response = self.client.create_chat_completion(
|
response = self.client.create_chat_completion(
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
|
|
@ -72,12 +64,7 @@ class LlamaProcessor:
|
||||||
return _extract_chat_text(response)
|
return _extract_chat_text(response)
|
||||||
|
|
||||||
|
|
||||||
def build_processor(verbose: bool = False) -> LlamaProcessor:
|
def ensure_model():
|
||||||
model_path = ensure_model()
|
|
||||||
return LlamaProcessor(LLMConfig(model_path=model_path, verbose=verbose))
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_model() -> Path:
|
|
||||||
if MODEL_PATH.exists():
|
if MODEL_PATH.exists():
|
||||||
return MODEL_PATH
|
return MODEL_PATH
|
||||||
MODEL_DIR.mkdir(parents=True, exist_ok=True)
|
MODEL_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
@ -111,7 +98,6 @@ def ensure_model() -> Path:
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
return MODEL_PATH
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_chat_text(payload: Any) -> str:
|
def _extract_chat_text(payload: Any) -> str:
|
||||||
|
|
|
||||||
10
src/leld.py
10
src/leld.py
|
|
@ -14,7 +14,7 @@ from faster_whisper import WhisperModel
|
||||||
|
|
||||||
from config import Config, load, redacted_dict
|
from config import Config, load, redacted_dict
|
||||||
from recorder import start_recording, stop_recording
|
from recorder import start_recording, stop_recording
|
||||||
from aiprocess import build_processor
|
from aiprocess import LlamaProcessor
|
||||||
from desktop import get_desktop_adapter
|
from desktop import get_desktop_adapter
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ def _compute_type(device: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
class Daemon:
|
class Daemon:
|
||||||
def __init__(self, cfg: Config, desktop, *, llama_verbose: bool = False):
|
def __init__(self, cfg: Config, desktop, *, verbose: bool = False):
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
self.desktop = desktop
|
self.desktop = desktop
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
|
|
@ -51,7 +51,7 @@ class Daemon:
|
||||||
device=cfg.stt.get("device", "cpu"),
|
device=cfg.stt.get("device", "cpu"),
|
||||||
compute_type=_compute_type(cfg.stt.get("device", "cpu")),
|
compute_type=_compute_type(cfg.stt.get("device", "cpu")),
|
||||||
)
|
)
|
||||||
self.ai_processor = build_processor(verbose=llama_verbose)
|
self.ai_processor = LlamaProcessor(verbose=verbose)
|
||||||
|
|
||||||
def set_state(self, state: str):
|
def set_state(self, state: str):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
|
|
@ -210,7 +210,6 @@ def main():
|
||||||
cfg = load(args.config)
|
cfg = load(args.config)
|
||||||
_lock_single_instance()
|
_lock_single_instance()
|
||||||
|
|
||||||
logging.info("ready")
|
|
||||||
logging.info("hotkey: %s", cfg.daemon.get("hotkey", ""))
|
logging.info("hotkey: %s", cfg.daemon.get("hotkey", ""))
|
||||||
logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2))
|
logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2))
|
||||||
|
|
||||||
|
|
@ -218,7 +217,7 @@ def main():
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
desktop = get_desktop_adapter()
|
desktop = get_desktop_adapter()
|
||||||
try:
|
try:
|
||||||
daemon = Daemon(cfg, desktop, llama_verbose=args.verbose)
|
daemon = Daemon(cfg, desktop, verbose=args.verbose)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logging.error("startup failed: %s", exc)
|
logging.error("startup failed: %s", exc)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
@ -238,6 +237,7 @@ def main():
|
||||||
cfg.daemon.get("hotkey", ""),
|
cfg.daemon.get("hotkey", ""),
|
||||||
lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(),
|
lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(),
|
||||||
)
|
)
|
||||||
|
logging.info("ready")
|
||||||
desktop.run_tray(daemon.get_state, daemon._quit)
|
desktop.run_tray(daemon.get_state, daemon._quit)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue