scribe/autotranscript/__main__.py


import whisper
from time import time
import os
from moviepy.editor import *
from typing import Union

class Transcribe:
    def __init__(self, audiofile : Union[bool, str, list] = None, model : str =  "medium", language :str =  "German"):
        """
         Class to autotranscript audio and video files with the Whisper model
        :param audiofile: audio file or list of audio files
        :param model: model to use for transcription
        :param language: language of the audio file
        """

        self.audiofile = audiofile

        self.language = language

        """
        Create folder structure
        """

        self.currentpath,\
            self.audiopath,\
            self.transcriptionpath,\
            self.audiofiles = self.create_folder_structure() # create folder structure

        print("loading model")
        self.model = whisper.load_model(model)  # load model
        print("model loaded")

    def create_folder_structure(self):
        """
        Create folder structure for audio and transcription files

        :return:  currentpath, audiopath, transcriptionpath, audiofiles
        """
        currentpath = os.getcwd() # get current path

        if not os.path.exists(os.path.join(currentpath, 'audiofiles')):
            print('Creating audiofiles folder')
            os.makedirs(os.path.join(currentpath, 'audiofiles'))
        if not os.path.exists(os.path.join(currentpath, 'transcription')):
            print('Creating transcription folder')
            os.makedirs(os.path.join(currentpath, 'transcription'))

        audiopath = os.path.join(currentpath, 'audiofiles')  # path to audio files
        transcriptionpath = os.path.join(currentpath, 'transcription') # path to transcription files

        audiofiles = os.listdir(audiopath) # list of audio files

        return currentpath, audiopath, transcriptionpath, audiofiles

    def video_to_audio(self,file,  remove_video=True):
        clip = VideoFileClip(file)
        clip.audio.write_audiofile(os.path.join(file[:-4] + '.mp3'))
        if remove_video:
            os.remove(file)
            print(f'Video {file} removed')
        return os.path.join(file[:-4] + '.mp3')


    def transcribe(self):

        if self.audiofile is not None:
            if self.audiofile in self.audiofiles:
                audiofile = os.path.join(self.audiopath, self.audiofile)
            else:
                raise ValueError('Audio file not found')

            if audiofile.endswith('.mp4'):
                print('Converting video to audio')
                audiofile = self.video_to_audio(audiofile)

            print(f'Start transcribing Audio file: {audiofile}')
            _stime = time()
            result = self.model.transcribe(audiofile, verbose=True, language= self.language)

            print(f'Transcription finished in {time() - _stime} seconds')

            txtfilename = str(audiofile.split('/')[-1][:-4]) + '.txt'

            savepath = os.path.join(self.transcriptionpath, txtfilename)

            with open(savepath, 'w') as f:
                f.write(result["text"])
        elif self.audiofile is None or isinstance(self.audiofile, list):
            print('No audio file specified or list of audio files')
            print(f"{len(self.audiofiles)} audio files found in {self.audiopath}")
            print("Start transcribing all audio files")
            i = 0
            for audiofile in self.audiofiles:

                audiofile = os.path.join(self.audiopath, audiofile)

                if audiofile.endswith('.mp4'):
                    audiofile = self.video_to_audio(audiofile)

                print(f'Start transcribing Audio file: {audiofile}')
                _stime = time()
                result = self.model.transcribe(audiofile, verbose=True, language=self.language)
                print(f'Transcription finished in {time() - _stime} seconds')

                txtfilename = str(audiofile.split('/')[-1][:-4]) + '.txt'

                savepath = os.path.join(self.transcriptionpath, txtfilename)

                with open(savepath, 'w') as f:
                    f.write(result["text"])

                i += 1
                print(f'{i} of {len(self.audiofiles)} files transcribed')

        else:
            raise ValueError('Audio file not found')

        print('Transcription finished')

    def __call__(self):
        return self.transcribe()
    def __repr__(self):
        return f"Transcribe(audiofile={self.audiofile}, model={self.model}, language={self.language})"
    def __str__(self):
        return f"Transcribe(audiofile={self.audiofile}, model={self.model}, language={self.language})"