From d3606a2dab5c2e8ad6dd001000eb203bf681a1c5 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 19 Jun 2023 12:01:18 +0200 Subject: [PATCH] removed dependencie on ffmpeg python will be dropped in future whisper realeases --- autotranscript/audio.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/autotranscript/audio.py b/autotranscript/audio.py index ea11fe8..4e7ee60 100644 --- a/autotranscript/audio.py +++ b/autotranscript/audio.py @@ -1,6 +1,6 @@ import numpy as np import torch -import ffmpeg +from subprocess import CalledProcessError, run SAMPLE_RATE = 16000 @@ -91,18 +91,24 @@ class AudioProcessor: ------- A NumPy array containing the audio waveform, in float32 dtype. """ + # This launches a subprocess to decode audio while down-mixing + # and resampling as necessary. Requires the ffmpeg CLI in PATH. + # fmt: off + cmd = [ + "ffmpeg", + "-nostdin", + "-threads", "0", + "-i", file, + "-f", "s16le", + "-ac", "1", + "-acodec", "pcm_s16le", + "-ar", str(sr), + "-" + ] + # fmt: on try: - # This launches a subprocess to decode audio while down-mixing - # and resampling as necessary. - # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. - out, _ = ( - ffmpeg.input(file, threads=0) - .output("-", format="s16le", acodec="pcm_s16le", - ac=1, ar=sr) - .run(cmd=["ffmpeg", "-nostdin"], - capture_stdout=True, capture_stderr=True) - ) - except ffmpeg.Error as e: + out = run(cmd, capture_output=True, check=True).stdout + except CalledProcessError as e: raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e out = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0