From d3606a2dab5c2e8ad6dd001000eb203bf681a1c5 Mon Sep 17 00:00:00 2001
From: Jaikinator <schmieder.jacob@web.de>
Date: Mon, 19 Jun 2023 12:01:18 +0200
Subject: [PATCH] removed dependencie on ffmpeg python will be dropped in
 future whisper realeases

---
 autotranscript/audio.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/autotranscript/audio.py b/autotranscript/audio.py
index ea11fe8..4e7ee60 100644
--- a/autotranscript/audio.py
+++ b/autotranscript/audio.py
@@ -1,6 +1,6 @@
 import numpy as np
 import torch
-import ffmpeg
+from subprocess import CalledProcessError, run
 
 SAMPLE_RATE = 16000
 
@@ -91,18 +91,24 @@ class AudioProcessor:
         -------
         A NumPy array containing the audio waveform, in float32 dtype.
         """
+        # This launches a subprocess to decode audio while down-mixing
+        # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
+        # fmt: off
+        cmd = [
+            "ffmpeg",
+            "-nostdin",
+            "-threads", "0",
+            "-i", file,
+            "-f", "s16le",
+            "-ac", "1",
+            "-acodec", "pcm_s16le",
+            "-ar", str(sr),
+            "-"
+        ]
+        # fmt: on
         try:
-            # This launches a subprocess to decode audio while down-mixing 
-            # and resampling as necessary.
-            # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
-            out, _ = (
-                ffmpeg.input(file, threads=0)
-                .output("-", format="s16le", acodec="pcm_s16le",
-                        ac=1, ar=sr)
-                .run(cmd=["ffmpeg", "-nostdin"],
-                     capture_stdout=True, capture_stderr=True)
-            )
-        except ffmpeg.Error as e:
+            out = run(cmd, capture_output=True, check=True).stdout
+        except CalledProcessError as e:
             raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
 
         out = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0