removed dependencie on ffmpeg python

will be dropped in future whisper realeases
2023-06-19 12:01:18 +02:00
parent 7bfd294bbd
commit d3606a2dab
1 changed files with 18 additions and 12 deletions
@@ -1,6 +1,6 @@
 import numpy as np
 import torch
-import ffmpeg
+from subprocess import CalledProcessError, run
 SAMPLE_RATE = 16000
@@ -91,18 +91,24 @@ class AudioProcessor:
        -------
        A NumPy array containing the audio waveform, in float32 dtype.
        """
        # This launches a subprocess to decode audio while down-mixing
        # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
        # fmt: off
        cmd = [
            "ffmpeg",
            "-nostdin",
            "-threads", "0",
            "-i", file,
            "-f", "s16le",
            "-ac", "1",
            "-acodec", "pcm_s16le",
            "-ar", str(sr),
            "-"
        ]
        # fmt: on
        try:
-            # This launches a subprocess to decode audio while down-mixing 
+            out = run(cmd, capture_output=True, check=True).stdout
-            # and resampling as necessary.
+        except CalledProcessError as e:
            # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
            out, _ = (
                ffmpeg.input(file, threads=0)
                .output("-", format="s16le", acodec="pcm_s16le",
                        ac=1, ar=sr)
                .run(cmd=["ffmpeg", "-nostdin"],
                     capture_stdout=True, capture_stderr=True)
            )
        except ffmpeg.Error as e:
            raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
        out = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0