diff --git a/autotranscript/__init__.py b/autotranscript/__init__.py new file mode 100644 index 0000000..13f245b --- /dev/null +++ b/autotranscript/__init__.py @@ -0,0 +1,4 @@ +from autotranscript.__main__ import * +from autotranscript.version import get_version as _get_version + +__version__ = _get_version() \ No newline at end of file diff --git a/autotranscript/__main__.py b/autotranscript/__main__.py new file mode 100644 index 0000000..ec06c3c --- /dev/null +++ b/autotranscript/__main__.py @@ -0,0 +1,126 @@ + +import whisper +from time import time +import os +from moviepy.editor import * +from typing import Union + +class Transcribe: + def __init__(self, audiofile : Union[bool, str, list] = None, model : str = "medium", language :str = "German"): + """ + Class to autotranscript audio and video files with the Whisper model + :param audiofile: audio file or list of audio files + :param model: model to use for transcription + :param language: language of the audio file + """ + + self.audiofile = audiofile + + self.language = language + + """ + Create folder structure + """ + + self.currentpath,\ + self.audiopath,\ + self.transcriptionpath,\ + self.audiofiles = self.create_folder_structure() # create folder structure + + print("loading model") + self.model = whisper.load_model(model) # load model + print("model loaded") + + def create_folder_structure(self): + """ + Create folder structure for audio and transcription files + + :return: currentpath, audiopath, transcriptionpath, audiofiles + """ + currentpath = os.getcwd() # get current path + + if not os.path.exists(os.path.join(currentpath, 'audiofiles')): + print('Creating audiofiles folder') + os.makedirs(os.path.join(currentpath, 'audiofiles')) + if not os.path.exists(os.path.join(currentpath, 'transcription')): + print('Creating transcription folder') + os.makedirs(os.path.join(currentpath, 'transcription')) + + audiopath = os.path.join(currentpath, 'audiofiles') # path to audio files + transcriptionpath = os.path.join(currentpath, 'transcription') # path to transcription files + + audiofiles = os.listdir(audiopath) # list of audio files + + return currentpath, audiopath, transcriptionpath, audiofiles + + def video_to_audio(self,file, remove_video=True): + clip = VideoFileClip(file) + clip.audio.write_audiofile(os.path.join(file[:-4] + '.mp3')) + if remove_video: + os.remove(file) + print(f'Video {file} removed') + return os.path.join(file[:-4] + '.mp3') + + + def transcribe(self): + + if self.audiofile is not None: + if self.audiofile in self.audiofiles: + audiofile = os.path.join(self.audiopath, self.audiofile) + else: + raise ValueError('Audio file not found') + + if audiofile.endswith('.mp4'): + print('Converting video to audio') + audiofile = self.video_to_audio(audiofile) + + print(f'Start transcribing Audio file: {audiofile}') + _stime = time() + result = self.model.transcribe(audiofile, verbose=True, language= self.language) + + print(f'Transcription finished in {time() - _stime} seconds') + + txtfilename = str(audiofile.split('/')[-1][:-4]) + '.txt' + + savepath = os.path.join(self.transcriptionpath, txtfilename) + + with open(savepath, 'w') as f: + f.write(result["text"]) + elif self.audiofile is None or isinstance(self.audiofile, list): + print('No audio file specified or list of audio files') + print(f"{len(self.audiofiles)} audio files found in {self.audiopath}") + print("Start transcribing all audio files") + i = 0 + for audiofile in self.audiofiles: + + audiofile = os.path.join(self.audiopath, audiofile) + + if audiofile.endswith('.mp4'): + audiofile = self.video_to_audio(audiofile) + + print(f'Start transcribing Audio file: {audiofile}') + _stime = time() + result = self.model.transcribe(audiofile, verbose=True, language=self.language) + print(f'Transcription finished in {time() - _stime} seconds') + + txtfilename = str(audiofile.split('/')[-1][:-4]) + '.txt' + + savepath = os.path.join(self.transcriptionpath, txtfilename) + + with open(savepath, 'w') as f: + f.write(result["text"]) + + i += 1 + print(f'{i} of {len(self.audiofiles)} files transcribed') + + else: + raise ValueError('Audio file not found') + + print('Transcription finished') + + def __call__(self): + return self.transcribe() + def __repr__(self): + return f"Transcribe(audiofile={self.audiofile}, model={self.model}, language={self.language})" + def __str__(self): + return f"Transcribe(audiofile={self.audiofile}, model={self.model}, language={self.language})" diff --git a/autotranscript/__pycache__/__init__.cpython-39.pyc b/autotranscript/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..04235a5 Binary files /dev/null and b/autotranscript/__pycache__/__init__.cpython-39.pyc differ diff --git a/autotranscript/__pycache__/__main__.cpython-39.pyc b/autotranscript/__pycache__/__main__.cpython-39.pyc new file mode 100644 index 0000000..d64ee0a Binary files /dev/null and b/autotranscript/__pycache__/__main__.cpython-39.pyc differ diff --git a/autotranscript/version.py b/autotranscript/version.py new file mode 100644 index 0000000..0a1a41e --- /dev/null +++ b/autotranscript/version.py @@ -0,0 +1,69 @@ +import os +import subprocess as sp + +MAJOR = 1 +MINOR = 0 +MICRO = 0 +MICRO_POST = 0 +ISRELEASED = False +VERSION = '%d.%d.%d.%d' % (MAJOR, MINOR, MICRO, MICRO_POST) + +# Return the git revision as a string +# taken from numpy/numpy +def git_version(): + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + + out = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=env).communicate()[0] + return out + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + GIT_REVISION = out.strip().decode('ascii') + except OSError: + GIT_REVISION = "Unknown" + + return GIT_REVISION + +def _get_git_version(): + cwd = os.getcwd() + + # go to the main directory + fdir = os.path.dirname(os.path.abspath(__file__)) + maindir = os.path.abspath(os.path.join(fdir, "..")) + # maindir = fdir # os.path.join(fdir, "..") + os.chdir(maindir) + + # get git version + res = git_version() + + # restore the cwd + os.chdir(cwd) + return res + +def get_version(build_version=False): + if ISRELEASED: + return VERSION + + # unreleased version + GIT_REVISION = _get_git_version() + + if build_version: + import datetime as dt + date = dt.date.strftime(dt.datetime.now(), "%Y%m%d%H%M%S") + return VERSION + ".dev" + date + else: + return VERSION + ".dev0+" + GIT_REVISION[:7] + + + diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..68ace87 --- /dev/null +++ b/environment.yml @@ -0,0 +1,96 @@ +name: whisper +channels: + - pytorch + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - blas=1.0=mkl + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2022.10.11=h06a4308_0 + - certifi=2022.9.24=py39h06a4308_0 + - cudatoolkit=11.3.1=h9edb442_10 + - ffmpeg=4.2.2=h20bf706_0 + - freetype=2.12.1=h4a9f257_0 + - giflib=5.2.1=h7b6447c_0 + - gmp=6.2.1=h295c915_3 + - gnutls=3.6.15=he1e5248_0 + - intel-openmp=2021.4.0=h06a4308_3561 + - jpeg=9e=h7f8727e_0 + - lame=3.100=h7b6447c_0 + - lcms2=2.12=h3be6417_0 + - ld_impl_linux-64=2.38=h1181459_1 + - lerc=3.0=h295c915_0 + - libdeflate=1.8=h7f8727e_5 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libidn2=2.3.2=h7f8727e_0 + - libopus=1.3.1=h7b6447c_0 + - libpng=1.6.37=hbc83047_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libtasn1=4.16.0=h27cfd23_0 + - libtiff=4.4.0=hecacb30_2 + - libunistring=0.9.10=h27cfd23_0 + - libuv=1.40.0=h7b6447c_0 + - libvpx=1.7.0=h439df22_0 + - libwebp=1.2.4=h11a3e52_0 + - libwebp-base=1.2.4=h5eee18b_0 + - lz4-c=1.9.3=h295c915_1 + - mkl=2021.4.0=h06a4308_640 + - mkl-service=2.4.0=py39h7f8727e_0 + - mkl_fft=1.3.1=py39hd3c417c_0 + - mkl_random=1.2.2=py39h51133e4_0 + - ncurses=6.3=h5eee18b_3 + - nettle=3.7.3=hbbd107a_1 + - numpy=1.23.4=py39h14f4228_0 + - numpy-base=1.23.4=py39h31eccc5_0 + - openh264=2.1.1=h4ff587b_0 + - openssl=1.1.1s=h7f8727e_0 + - pillow=9.2.0=py39hace64e9_1 + - pip=22.2.2=py39h06a4308_0 + - python=3.9.15=haa1d7c7_0 + - pytorch=1.10.1=py3.9_cuda11.3_cudnn8.2.0_0 + - pytorch-mutex=1.0=cuda + - readline=8.2=h5eee18b_0 + - setuptools=65.5.0=py39h06a4308_0 + - six=1.16.0=pyhd3eb1b0_1 + - sqlite=3.39.3=h5082296_0 + - tk=8.6.12=h1ccaba5_0 + - torchaudio=0.10.1=py39_cu113 + - torchvision=0.11.2=py39_cu113 + - typing_extensions=4.3.0=py39h06a4308_0 + - tzdata=2022f=h04d1e81_0 + - wheel=0.37.1=pyhd3eb1b0_0 + - x264=1!157.20191217=h7b6447c_0 + - xz=5.2.6=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - zstd=1.5.2=ha4553b6_0 + - pip: + - charset-normalizer==2.1.1 + - decorator==4.4.2 + - ffmpeg-python==0.2.0 + - filelock==3.8.0 + - future==0.18.2 + - huggingface-hub==0.11.0 + - idna==3.4 + - imageio==2.23.0 + - imageio-ffmpeg==0.4.7 + - more-itertools==9.0.0 + - moviepy==1.0.3 + - packaging==21.3 + - proglog==0.1.10 + - pydub==0.25.1 + - pyparsing==3.0.9 + - pyyaml==6.0 + - regex==2022.10.31 + - requests==2.28.1 + - semantic-version==2.10.0 + - setuptools-rust==1.5.2 + - tokenizers==0.13.2 + - tqdm==4.64.1 + - transformers==4.24.0 + - urllib3==1.26.12 + - whisper==1.0 +prefix: /home/jacob/anaconda3/envs/whisper diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..481336a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +whisper~=1.0 +moviepy~=1.0.3 +setuptools~=65.5.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d6884d3 --- /dev/null +++ b/setup.py @@ -0,0 +1,38 @@ +import pkg_resources +import os +from setuptools import setup, find_packages + +module_name = "autotranscript" +github_url = "https://github.com/Jaikinator/transcriptor" + +file_dir = os.path.dirname(os.path.realpath(__file__)) +absdir = lambda p: os.path.join(file_dir, p) + +############### versioning ############### +verfile = os.path.abspath(os.path.join(module_name, "version.py")) +version = {"__file__": verfile} + +with open(verfile, "r") as fp: + exec(fp.read(), version) + +############### setup ############### + +build_version = "OPTB_BUILD" in os.environ + +setup( + name=module_name, + version=version["get_version"](build_version), + packages=find_packages(), + python_requires="~=3.9", + readme="README.md", + install_requires = [str(r) for r in pkg_resources.parse_requirements( + open(os.path.join(os.path.dirname(__file__), "requirements.txt")) + ) + ], + url= github_url, + license='', + author='Jacob Schmieder', + author_email='', + description='Transcription tool for audio files based on Whisper', + #entry_points={'console_scripts': ['autotranscript = autotranscript.__main__:main']} +) diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..6be0c17 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,4 @@ +from autotranscript import Transcribe + +Transcribe().transcribe() +