Add files via upload
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
from autotranscript.__main__ import *
|
||||
from autotranscript.version import get_version as _get_version
|
||||
|
||||
__version__ = _get_version()
|
||||
@@ -0,0 +1,126 @@
|
||||
|
||||
import whisper
|
||||
from time import time
|
||||
import os
|
||||
from moviepy.editor import *
|
||||
from typing import Union
|
||||
|
||||
class Transcribe:
|
||||
def __init__(self, audiofile : Union[bool, str, list] = None, model : str = "medium", language :str = "German"):
|
||||
"""
|
||||
Class to autotranscript audio and video files with the Whisper model
|
||||
:param audiofile: audio file or list of audio files
|
||||
:param model: model to use for transcription
|
||||
:param language: language of the audio file
|
||||
"""
|
||||
|
||||
self.audiofile = audiofile
|
||||
|
||||
self.language = language
|
||||
|
||||
"""
|
||||
Create folder structure
|
||||
"""
|
||||
|
||||
self.currentpath,\
|
||||
self.audiopath,\
|
||||
self.transcriptionpath,\
|
||||
self.audiofiles = self.create_folder_structure() # create folder structure
|
||||
|
||||
print("loading model")
|
||||
self.model = whisper.load_model(model) # load model
|
||||
print("model loaded")
|
||||
|
||||
def create_folder_structure(self):
|
||||
"""
|
||||
Create folder structure for audio and transcription files
|
||||
|
||||
:return: currentpath, audiopath, transcriptionpath, audiofiles
|
||||
"""
|
||||
currentpath = os.getcwd() # get current path
|
||||
|
||||
if not os.path.exists(os.path.join(currentpath, 'audiofiles')):
|
||||
print('Creating audiofiles folder')
|
||||
os.makedirs(os.path.join(currentpath, 'audiofiles'))
|
||||
if not os.path.exists(os.path.join(currentpath, 'transcription')):
|
||||
print('Creating transcription folder')
|
||||
os.makedirs(os.path.join(currentpath, 'transcription'))
|
||||
|
||||
audiopath = os.path.join(currentpath, 'audiofiles') # path to audio files
|
||||
transcriptionpath = os.path.join(currentpath, 'transcription') # path to transcription files
|
||||
|
||||
audiofiles = os.listdir(audiopath) # list of audio files
|
||||
|
||||
return currentpath, audiopath, transcriptionpath, audiofiles
|
||||
|
||||
def video_to_audio(self,file, remove_video=True):
|
||||
clip = VideoFileClip(file)
|
||||
clip.audio.write_audiofile(os.path.join(file[:-4] + '.mp3'))
|
||||
if remove_video:
|
||||
os.remove(file)
|
||||
print(f'Video {file} removed')
|
||||
return os.path.join(file[:-4] + '.mp3')
|
||||
|
||||
|
||||
def transcribe(self):
|
||||
|
||||
if self.audiofile is not None:
|
||||
if self.audiofile in self.audiofiles:
|
||||
audiofile = os.path.join(self.audiopath, self.audiofile)
|
||||
else:
|
||||
raise ValueError('Audio file not found')
|
||||
|
||||
if audiofile.endswith('.mp4'):
|
||||
print('Converting video to audio')
|
||||
audiofile = self.video_to_audio(audiofile)
|
||||
|
||||
print(f'Start transcribing Audio file: {audiofile}')
|
||||
_stime = time()
|
||||
result = self.model.transcribe(audiofile, verbose=True, language= self.language)
|
||||
|
||||
print(f'Transcription finished in {time() - _stime} seconds')
|
||||
|
||||
txtfilename = str(audiofile.split('/')[-1][:-4]) + '.txt'
|
||||
|
||||
savepath = os.path.join(self.transcriptionpath, txtfilename)
|
||||
|
||||
with open(savepath, 'w') as f:
|
||||
f.write(result["text"])
|
||||
elif self.audiofile is None or isinstance(self.audiofile, list):
|
||||
print('No audio file specified or list of audio files')
|
||||
print(f"{len(self.audiofiles)} audio files found in {self.audiopath}")
|
||||
print("Start transcribing all audio files")
|
||||
i = 0
|
||||
for audiofile in self.audiofiles:
|
||||
|
||||
audiofile = os.path.join(self.audiopath, audiofile)
|
||||
|
||||
if audiofile.endswith('.mp4'):
|
||||
audiofile = self.video_to_audio(audiofile)
|
||||
|
||||
print(f'Start transcribing Audio file: {audiofile}')
|
||||
_stime = time()
|
||||
result = self.model.transcribe(audiofile, verbose=True, language=self.language)
|
||||
print(f'Transcription finished in {time() - _stime} seconds')
|
||||
|
||||
txtfilename = str(audiofile.split('/')[-1][:-4]) + '.txt'
|
||||
|
||||
savepath = os.path.join(self.transcriptionpath, txtfilename)
|
||||
|
||||
with open(savepath, 'w') as f:
|
||||
f.write(result["text"])
|
||||
|
||||
i += 1
|
||||
print(f'{i} of {len(self.audiofiles)} files transcribed')
|
||||
|
||||
else:
|
||||
raise ValueError('Audio file not found')
|
||||
|
||||
print('Transcription finished')
|
||||
|
||||
def __call__(self):
|
||||
return self.transcribe()
|
||||
def __repr__(self):
|
||||
return f"Transcribe(audiofile={self.audiofile}, model={self.model}, language={self.language})"
|
||||
def __str__(self):
|
||||
return f"Transcribe(audiofile={self.audiofile}, model={self.model}, language={self.language})"
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,69 @@
|
||||
import os
|
||||
import subprocess as sp
|
||||
|
||||
MAJOR = 1
|
||||
MINOR = 0
|
||||
MICRO = 0
|
||||
MICRO_POST = 0
|
||||
ISRELEASED = False
|
||||
VERSION = '%d.%d.%d.%d' % (MAJOR, MINOR, MICRO, MICRO_POST)
|
||||
|
||||
# Return the git revision as a string
|
||||
# taken from numpy/numpy
|
||||
def git_version():
|
||||
def _minimal_ext_cmd(cmd):
|
||||
# construct minimal environment
|
||||
env = {}
|
||||
for k in ['SYSTEMROOT', 'PATH', 'HOME']:
|
||||
v = os.environ.get(k)
|
||||
if v is not None:
|
||||
env[k] = v
|
||||
|
||||
# LANGUAGE is used on win32
|
||||
env['LANGUAGE'] = 'C'
|
||||
env['LANG'] = 'C'
|
||||
env['LC_ALL'] = 'C'
|
||||
|
||||
out = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=env).communicate()[0]
|
||||
return out
|
||||
|
||||
try:
|
||||
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
|
||||
GIT_REVISION = out.strip().decode('ascii')
|
||||
except OSError:
|
||||
GIT_REVISION = "Unknown"
|
||||
|
||||
return GIT_REVISION
|
||||
|
||||
def _get_git_version():
|
||||
cwd = os.getcwd()
|
||||
|
||||
# go to the main directory
|
||||
fdir = os.path.dirname(os.path.abspath(__file__))
|
||||
maindir = os.path.abspath(os.path.join(fdir, ".."))
|
||||
# maindir = fdir # os.path.join(fdir, "..")
|
||||
os.chdir(maindir)
|
||||
|
||||
# get git version
|
||||
res = git_version()
|
||||
|
||||
# restore the cwd
|
||||
os.chdir(cwd)
|
||||
return res
|
||||
|
||||
def get_version(build_version=False):
|
||||
if ISRELEASED:
|
||||
return VERSION
|
||||
|
||||
# unreleased version
|
||||
GIT_REVISION = _get_git_version()
|
||||
|
||||
if build_version:
|
||||
import datetime as dt
|
||||
date = dt.date.strftime(dt.datetime.now(), "%Y%m%d%H%M%S")
|
||||
return VERSION + ".dev" + date
|
||||
else:
|
||||
return VERSION + ".dev0+" + GIT_REVISION[:7]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user