Implemented faster-whisper, removed WhisperX
This commit is contained in:
+19
-20
@@ -26,8 +26,7 @@ Usage:
|
||||
|
||||
from whisper import Whisper
|
||||
from whisper import load_model as whisper_load_model
|
||||
from whisperx.asr import WhisperModel
|
||||
from whisperx import load_model as whisperx_load_model
|
||||
from faster_whisper import WhisperModel as FasterWhisperModel
|
||||
from typing import TypeVar, Union, Optional
|
||||
from torch import Tensor, device
|
||||
from torch.cuda import is_available as cuda_is_available
|
||||
@@ -145,7 +144,7 @@ class Transcriber:
|
||||
- 'large-v3'
|
||||
- 'large'
|
||||
whisper_type (str):
|
||||
Type of whisper model to load. "whisper" or "whisperx".
|
||||
Type of whisper model to load. "whisper" or "faster-whisper".
|
||||
download_root (str, optional): Path to download the model.
|
||||
Defaults to WHISPER_DEFAULT_PATH.
|
||||
device (Optional[Union[str, torch.device]], optional):
|
||||
@@ -272,7 +271,7 @@ class WhisperTranscriber(Transcriber):
|
||||
return f"WhisperTranscriber(model_name={self.model_name}, model={self.model})"
|
||||
|
||||
|
||||
class WhisperXTranscriber(Transcriber):
|
||||
class FasterWhisperTranscriber(Transcriber):
|
||||
def __init__(self, model: whisper, model_name: str) -> None:
|
||||
super().__init__(model, model_name)
|
||||
|
||||
@@ -294,10 +293,10 @@ class WhisperXTranscriber(Transcriber):
|
||||
|
||||
if isinstance(audio, Tensor):
|
||||
audio = audio.cpu().numpy()
|
||||
result = self.model.transcribe(audio, *args, **kwargs)
|
||||
result, _ = self.model.transcribe(audio, *args, **kwargs)
|
||||
text = ""
|
||||
for seg in result['segments']:
|
||||
text += seg['text']
|
||||
for seg in result:
|
||||
text += seg.text
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
@@ -306,7 +305,7 @@ class WhisperXTranscriber(Transcriber):
|
||||
download_root: str = WHISPER_DEFAULT_PATH,
|
||||
device: Optional[Union[str, device]] = None,
|
||||
*args, **kwargs
|
||||
) -> 'WhisperXTranscriber':
|
||||
) -> 'FasterWhisperModel':
|
||||
"""
|
||||
Load whisper model.
|
||||
|
||||
@@ -347,8 +346,8 @@ class WhisperXTranscriber(Transcriber):
|
||||
warnings.warn(f'Compute type {compute_type} not compatible with '
|
||||
f'device {device}! Changing compute type to int8.')
|
||||
compute_type = 'int8'
|
||||
_model = whisperx_load_model(model, download_root=download_root,
|
||||
device=device, compute_type=compute_type)
|
||||
_model = FasterWhisperModel(model, download_root=download_root,
|
||||
device=device, compute_type=compute_type)
|
||||
|
||||
return cls(_model, model_name=model)
|
||||
|
||||
@@ -361,7 +360,7 @@ class WhisperXTranscriber(Transcriber):
|
||||
dict: Keyword arguments for whisper model.
|
||||
"""
|
||||
# _possible_kwargs = WhisperModel.transcribe.__code__.co_varnames
|
||||
_possible_kwargs = signature(WhisperModel.transcribe).parameters.keys()
|
||||
_possible_kwargs = signature(FasterWhisperModel.transcribe).parameters.keys()
|
||||
|
||||
whisper_kwargs = {k: v for k,
|
||||
v in kwargs.items() if k in _possible_kwargs}
|
||||
@@ -375,7 +374,7 @@ class WhisperXTranscriber(Transcriber):
|
||||
return whisper_kwargs
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"WhisperXTranscriber(model_name={self.model_name}, model={self.model})"
|
||||
return f"FasterWhisperTranscriber(model_name={self.model_name}, model={self.model})"
|
||||
|
||||
|
||||
def load_transcriber(model: str = "medium",
|
||||
@@ -384,7 +383,7 @@ def load_transcriber(model: str = "medium",
|
||||
device: Optional[Union[str, device]] = None,
|
||||
in_memory: bool = False,
|
||||
*args, **kwargs
|
||||
) -> Union[WhisperTranscriber, WhisperXTranscriber]:
|
||||
) -> Union[WhisperTranscriber, FasterWhisperTranscriber]:
|
||||
"""
|
||||
Load whisper model.
|
||||
|
||||
@@ -403,28 +402,28 @@ def load_transcriber(model: str = "medium",
|
||||
- 'large-v3'
|
||||
- 'large'
|
||||
whisper_type (str):
|
||||
Type of whisper model to load. "whisper" or "whisperx".
|
||||
Type of whisper model to load. "whisper" or "faster-whisper".
|
||||
download_root (str, optional): Path to download the model.
|
||||
Defaults to WHISPER_DEFAULT_PATH.
|
||||
device (Optional[Union[str, torch.device]], optional):
|
||||
device (Optional[Union[str, torch.device]], optional):
|
||||
Device to load model on. Defaults to None.
|
||||
in_memory (bool, optional): Whether to load model in memory.
|
||||
in_memory (bool, optional): Whether to load model in memory.
|
||||
Defaults to False.
|
||||
args: Additional arguments only to avoid errors.
|
||||
kwargs: Additional keyword arguments only to avoid errors.
|
||||
|
||||
Returns:
|
||||
Union[WhisperTranscriber, WhisperXTranscriber]:
|
||||
Union[WhisperTranscriber, FasterWhisperTranscriber]:
|
||||
One of the Whisper variants as Transcrbier object initialized with the specified model.
|
||||
"""
|
||||
if whisper_type.lower() == 'whisper':
|
||||
_model = WhisperTranscriber.load_model(
|
||||
model, download_root, device, in_memory, *args, **kwargs)
|
||||
return _model
|
||||
elif whisper_type.lower() == 'whisperx':
|
||||
_model = WhisperXTranscriber.load_model(
|
||||
elif whisper_type.lower() == 'faster-whisper':
|
||||
_model = FasterWhisperTranscriber.load_model(
|
||||
model, download_root, device, *args, **kwargs)
|
||||
return _model
|
||||
else:
|
||||
raise ValueError(f'Model type not recognized, exptected "whisper" '
|
||||
f'or "whisperx", got {whisper_type}.')
|
||||
f'or "faster-whisper", got {whisper_type}.')
|
||||
|
||||
Reference in New Issue
Block a user