diff --git a/autotranscript/__main__.py b/autotranscript/__main__.py index ec06c3c..8e726f6 100644 --- a/autotranscript/__main__.py +++ b/autotranscript/__main__.py @@ -2,8 +2,9 @@ import whisper from time import time import os -from moviepy.editor import * + from typing import Union +from pydub import AudioSegment class Transcribe: def __init__(self, audiofile : Union[bool, str, list] = None, model : str = "medium", language :str = "German"): @@ -53,12 +54,20 @@ class Transcribe: return currentpath, audiopath, transcriptionpath, audiofiles - def video_to_audio(self,file, remove_video=True): - clip = VideoFileClip(file) - clip.audio.write_audiofile(os.path.join(file[:-4] + '.mp3')) - if remove_video: + def to_mp3(self,file, remove_orginal=True): + """ + Convert video file or other audio files to mp3 file, ensures that the audio file is in the correct format for the + Whisper model + :param file: audio or video file + :param remove_orginal: remove original file + :return: mp3 file path + """ + + AudioSegment.from_file(file, format=file.split('.')[-1]).export(file[:-4] + '.mp3', format='mp3') + + if remove_orginal: os.remove(file) - print(f'Video {file} removed') + print(f'File {file} removed') return os.path.join(file[:-4] + '.mp3') @@ -70,9 +79,9 @@ class Transcribe: else: raise ValueError('Audio file not found') - if audiofile.endswith('.mp4'): + if not audiofile.endswith('.mp3'): print('Converting video to audio') - audiofile = self.video_to_audio(audiofile) + audiofile = self.to_mp3(audiofile) print(f'Start transcribing Audio file: {audiofile}') _stime = time() @@ -95,8 +104,8 @@ class Transcribe: audiofile = os.path.join(self.audiopath, audiofile) - if audiofile.endswith('.mp4'): - audiofile = self.video_to_audio(audiofile) + if not audiofile.endswith('.mp3'): + audiofile = self.to_mp3(audiofile) print(f'Start transcribing Audio file: {audiofile}') _stime = time()