changed converting to mp3 using pydub

This commit is contained in:
Jaikinator
2022-12-21 17:36:48 +01:00
parent 60ef9c0db8
commit d3c93e2356
+19 -10
View File
@@ -2,8 +2,9 @@
import whisper import whisper
from time import time from time import time
import os import os
from moviepy.editor import *
from typing import Union from typing import Union
from pydub import AudioSegment
class Transcribe: class Transcribe:
def __init__(self, audiofile : Union[bool, str, list] = None, model : str = "medium", language :str = "German"): def __init__(self, audiofile : Union[bool, str, list] = None, model : str = "medium", language :str = "German"):
@@ -53,12 +54,20 @@ class Transcribe:
return currentpath, audiopath, transcriptionpath, audiofiles return currentpath, audiopath, transcriptionpath, audiofiles
def video_to_audio(self,file, remove_video=True): def to_mp3(self,file, remove_orginal=True):
clip = VideoFileClip(file) """
clip.audio.write_audiofile(os.path.join(file[:-4] + '.mp3')) Convert video file or other audio files to mp3 file, ensures that the audio file is in the correct format for the
if remove_video: Whisper model
:param file: audio or video file
:param remove_orginal: remove original file
:return: mp3 file path
"""
AudioSegment.from_file(file, format=file.split('.')[-1]).export(file[:-4] + '.mp3', format='mp3')
if remove_orginal:
os.remove(file) os.remove(file)
print(f'Video {file} removed') print(f'File {file} removed')
return os.path.join(file[:-4] + '.mp3') return os.path.join(file[:-4] + '.mp3')
@@ -70,9 +79,9 @@ class Transcribe:
else: else:
raise ValueError('Audio file not found') raise ValueError('Audio file not found')
if audiofile.endswith('.mp4'): if not audiofile.endswith('.mp3'):
print('Converting video to audio') print('Converting video to audio')
audiofile = self.video_to_audio(audiofile) audiofile = self.to_mp3(audiofile)
print(f'Start transcribing Audio file: {audiofile}') print(f'Start transcribing Audio file: {audiofile}')
_stime = time() _stime = time()
@@ -95,8 +104,8 @@ class Transcribe:
audiofile = os.path.join(self.audiopath, audiofile) audiofile = os.path.join(self.audiopath, audiofile)
if audiofile.endswith('.mp4'): if not audiofile.endswith('.mp3'):
audiofile = self.video_to_audio(audiofile) audiofile = self.to_mp3(audiofile)
print(f'Start transcribing Audio file: {audiofile}') print(f'Start transcribing Audio file: {audiofile}')
_stime = time() _stime = time()