Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -47,9 +47,11 @@ SYSTEM_PROMPT = (
class LlamaProcessor:
def __init__(self, verbose: bool = False):
def __init__(self, verbose: bool = False, model_path: str | Path | None = None):
Llama, llama_cpp_lib = _load_llama_bindings()
ensure_model()
active_model_path = Path(model_path) if model_path else ensure_model()
if not active_model_path.exists():
raise RuntimeError(f"llm model path does not exist: {active_model_path}")
if not verbose:
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
@ -58,7 +60,7 @@ class LlamaProcessor:
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
self.client = Llama(
model_path=str(MODEL_PATH),
model_path=str(active_model_path),
n_ctx=4096,
verbose=verbose,
)
@ -66,18 +68,16 @@ class LlamaProcessor:
def process(
self,
text: str,
lang: str = "en",
lang: str = "auto",
*,
dictionary_context: str = "",
profile: str = "default",
) -> str:
request_payload: dict[str, Any] = {
"language": lang,
"transcript": text,
}
cleaned_dictionary = dictionary_context.strip()
if cleaned_dictionary:
request_payload["dictionary"] = cleaned_dictionary
request_payload = _build_request_payload(
text,
lang=lang,
dictionary_context=dictionary_context,
)
kwargs: dict[str, Any] = {
"messages": [
@ -94,6 +94,83 @@ class LlamaProcessor:
return _extract_cleaned_text(response)
class ExternalApiProcessor:
def __init__(
self,
*,
provider: str,
base_url: str,
model: str,
api_key_env_var: str,
timeout_ms: int,
max_retries: int,
):
normalized_provider = provider.strip().lower()
if normalized_provider != "openai":
raise RuntimeError(f"unsupported external api provider: {provider}")
self.provider = normalized_provider
self.base_url = base_url.rstrip("/")
self.model = model.strip()
self.timeout_sec = max(timeout_ms, 1) / 1000.0
self.max_retries = max_retries
self.api_key_env_var = api_key_env_var
key = os.getenv(api_key_env_var, "").strip()
if not key:
raise RuntimeError(
f"missing external api key in environment variable {api_key_env_var}"
)
self._api_key = key
def process(
self,
text: str,
lang: str = "auto",
*,
dictionary_context: str = "",
profile: str = "default",
) -> str:
request_payload = _build_request_payload(
text,
lang=lang,
dictionary_context=dictionary_context,
)
completion_payload: dict[str, Any] = {
"model": self.model,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": json.dumps(request_payload, ensure_ascii=False)},
],
"temperature": 0.0,
"response_format": {"type": "json_object"},
}
if profile.strip().lower() == "fast":
completion_payload["max_tokens"] = 192
endpoint = f"{self.base_url}/chat/completions"
body = json.dumps(completion_payload, ensure_ascii=False).encode("utf-8")
request = urllib.request.Request(
endpoint,
data=body,
headers={
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
},
method="POST",
)
last_exc: Exception | None = None
for attempt in range(self.max_retries + 1):
try:
with urllib.request.urlopen(request, timeout=self.timeout_sec) as response:
payload = json.loads(response.read().decode("utf-8"))
return _extract_cleaned_text(payload)
except Exception as exc:
last_exc = exc
if attempt < self.max_retries:
continue
raise RuntimeError(f"external api request failed: {last_exc}")
def ensure_model():
had_invalid_cache = False
if MODEL_PATH.exists():
@ -188,6 +265,17 @@ def _extract_chat_text(payload: Any) -> str:
raise RuntimeError("unexpected response format")
def _build_request_payload(text: str, *, lang: str, dictionary_context: str) -> dict[str, Any]:
payload: dict[str, Any] = {
"language": lang,
"transcript": text,
}
cleaned_dictionary = dictionary_context.strip()
if cleaned_dictionary:
payload["dictionary"] = cleaned_dictionary
return payload
def _extract_cleaned_text(payload: Any) -> str:
raw = _extract_chat_text(payload)
try: