changed converting to mp3 using pydub

2022-12-21 17:36:48 +01:00
parent 60ef9c0db8
commit d3c93e2356
1 changed files with 19 additions and 10 deletions
@@ -2,8 +2,9 @@
 import whisper
 from time import time
 import os
-from moviepy.editor import *
+
 from typing import Union
 from pydub import AudioSegment
 class Transcribe:
    def __init__(self, audiofile : Union[bool, str, list] = None, model : str =  "medium", language :str =  "German"):
@@ -53,12 +54,20 @@ class Transcribe:
        return currentpath, audiopath, transcriptionpath, audiofiles
-    def video_to_audio(self,file,  remove_video=True):
+    def to_mp3(self,file,  remove_orginal=True):
-        clip = VideoFileClip(file)
+        """
-        clip.audio.write_audiofile(os.path.join(file[:-4] + '.mp3'))
+        Convert video file or other audio files to mp3 file, ensures that the audio file is in the correct format for the
-        if remove_video:
+        Whisper model
        :param file:  audio or video file
        :param remove_orginal: remove original file
        :return: mp3 file path
        """
        AudioSegment.from_file(file, format=file.split('.')[-1]).export(file[:-4] + '.mp3', format='mp3')
        if remove_orginal:
            os.remove(file)
-            print(f'Video {file} removed')
+            print(f'File {file} removed')
        return os.path.join(file[:-4] + '.mp3')
@@ -70,9 +79,9 @@ class Transcribe:
            else:
                raise ValueError('Audio file not found')
-            if audiofile.endswith('.mp4'):
+            if not audiofile.endswith('.mp3'):
                print('Converting video to audio')
-                audiofile = self.video_to_audio(audiofile)
+                audiofile = self.to_mp3(audiofile)
            print(f'Start transcribing Audio file: {audiofile}')
            _stime = time()
@@ -95,8 +104,8 @@ class Transcribe:
                audiofile = os.path.join(self.audiopath, audiofile)
-                if audiofile.endswith('.mp4'):
+                if not audiofile.endswith('.mp3'):
-                    audiofile = self.video_to_audio(audiofile)
+                    audiofile = self.to_mp3(audiofile)
                print(f'Start transcribing Audio file: {audiofile}')
                _stime = time()