removed dependencie on ffmpeg python

will be dropped in future whisper realeases
2023-06-19 12:01:18 +02:00
parent 7bfd294bbd
commit d3606a2dab
1 changed files with 18 additions and 12 deletions
@@ -1,6 +1,6 @@
 import numpy as np
 import torch
-import ffmpeg
+from subprocess import CalledProcessError, run
 SAMPLE_RATE = 16000
@@ -91,18 +91,24 @@ class AudioProcessor:
        -------
        A NumPy array containing the audio waveform, in float32 dtype.
        """
        try:
        # This launches a subprocess to decode audio while down-mixing
-            # and resampling as necessary.
+        # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
-            # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
+        # fmt: off
-            out, _ = (
+        cmd = [
-                ffmpeg.input(file, threads=0)
+            "ffmpeg",
-                .output("-", format="s16le", acodec="pcm_s16le",
+            "-nostdin",
-                        ac=1, ar=sr)
+            "-threads", "0",
-                .run(cmd=["ffmpeg", "-nostdin"],
+            "-i", file,
-                     capture_stdout=True, capture_stderr=True)
+            "-f", "s16le",
-            )
+            "-ac", "1",
-        except ffmpeg.Error as e:
+            "-acodec", "pcm_s16le",
            "-ar", str(sr),
            "-"
        ]
        # fmt: on
        try:
            out = run(cmd, capture_output=True, check=True).stdout
        except CalledProcessError as e:
            raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
        out = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0