package whisper import ( "context" "errors" "os" "os/exec" "path/filepath" "strings" "time" ) type Config struct { Model string Language string Device string ExtraArgs string Timeout time.Duration } func Transcribe(ctx context.Context, wavPath, outDir string, cfg Config) (string, error) { if cfg.Timeout > 0 { var cancel context.CancelFunc ctx, cancel = context.WithTimeout(ctx, cfg.Timeout) defer cancel() } if err := os.MkdirAll(outDir, 0o755); err != nil { return "", err } args := []string{wavPath, "--model", cfg.Model, "--task", "transcribe", "--device", cfg.Device, "--output_format", "txt", "--output_dir", outDir, "--verbose", "False", } if strings.TrimSpace(cfg.Language) != "" { args = append(args, "--language", cfg.Language) } if strings.TrimSpace(cfg.ExtraArgs) != "" { extra := strings.Fields(cfg.ExtraArgs) args = append(args, extra...) } cmd := exec.CommandContext(ctx, "whisper", args...) out, err := cmd.CombinedOutput() if err != nil { if len(out) > 0 { return "", errors.New(string(out)) } return "", err } txt := filepath.Join(outDir, strings.TrimSuffix(filepath.Base(wavPath), filepath.Ext(wavPath))+".txt") data, err := os.ReadFile(txt) if err != nil { return "", err } text := strings.TrimSpace(string(data)) if text == "" { return "", errors.New("empty transcript") } return text, nil }