69 lines
1.4 KiB
Go
69 lines
1.4 KiB
Go
package whisper
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type Config struct {
|
|
Model string
|
|
Language string
|
|
Device string
|
|
ExtraArgs string
|
|
Timeout time.Duration
|
|
}
|
|
|
|
func Transcribe(ctx context.Context, wavPath, outDir string, cfg Config) (string, error) {
|
|
if cfg.Timeout > 0 {
|
|
var cancel context.CancelFunc
|
|
ctx, cancel = context.WithTimeout(ctx, cfg.Timeout)
|
|
defer cancel()
|
|
}
|
|
|
|
if err := os.MkdirAll(outDir, 0o755); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
args := []string{wavPath,
|
|
"--model", cfg.Model,
|
|
"--task", "transcribe",
|
|
"--device", cfg.Device,
|
|
"--output_format", "txt",
|
|
"--output_dir", outDir,
|
|
"--verbose", "False",
|
|
}
|
|
|
|
if strings.TrimSpace(cfg.Language) != "" {
|
|
args = append(args, "--language", cfg.Language)
|
|
}
|
|
if strings.TrimSpace(cfg.ExtraArgs) != "" {
|
|
extra := strings.Fields(cfg.ExtraArgs)
|
|
args = append(args, extra...)
|
|
}
|
|
|
|
cmd := exec.CommandContext(ctx, "whisper", args...)
|
|
out, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
if len(out) > 0 {
|
|
return "", errors.New(string(out))
|
|
}
|
|
return "", err
|
|
}
|
|
|
|
txt := filepath.Join(outDir, strings.TrimSuffix(filepath.Base(wavPath), filepath.Ext(wavPath))+".txt")
|
|
data, err := os.ReadFile(txt)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
text := strings.TrimSpace(string(data))
|
|
if text == "" {
|
|
return "", errors.New("empty transcript")
|
|
}
|
|
return text, nil
|
|
}
|