aman/internal/whisper/transcribe.go

69 lines
1.4 KiB
Go

package whisper
import (
"context"
"errors"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
type Config struct {
Model string
Language string
Device string
ExtraArgs string
Timeout time.Duration
}
func Transcribe(ctx context.Context, wavPath, outDir string, cfg Config) (string, error) {
if cfg.Timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, cfg.Timeout)
defer cancel()
}
if err := os.MkdirAll(outDir, 0o755); err != nil {
return "", err
}
args := []string{wavPath,
"--model", cfg.Model,
"--task", "transcribe",
"--device", cfg.Device,
"--output_format", "txt",
"--output_dir", outDir,
"--verbose", "False",
}
if strings.TrimSpace(cfg.Language) != "" {
args = append(args, "--language", cfg.Language)
}
if strings.TrimSpace(cfg.ExtraArgs) != "" {
extra := strings.Fields(cfg.ExtraArgs)
args = append(args, extra...)
}
cmd := exec.CommandContext(ctx, "whisper", args...)
out, err := cmd.CombinedOutput()
if err != nil {
if len(out) > 0 {
return "", errors.New(string(out))
}
return "", err
}
txt := filepath.Join(outDir, strings.TrimSuffix(filepath.Base(wavPath), filepath.Ext(wavPath))+".txt")
data, err := os.ReadFile(txt)
if err != nil {
return "", err
}
text := strings.TrimSpace(string(data))
if text == "" {
return "", errors.New("empty transcript")
}
return text, nil
}