Use verbose_json diarization, add JSON+TXT email feature
This commit is contained in:
+16
-16
@@ -39,7 +39,7 @@ class LocalAIClient:
|
||||
Responsibilities:
|
||||
- Read configuration from environment.
|
||||
- Upload audio file as multipart/form-data.
|
||||
- Parse diarization + transcription response.
|
||||
- Parse diarization + transcription response (verbose_json).
|
||||
- Map response into the same structure expected by Scraibe's Transcript.
|
||||
"""
|
||||
|
||||
@@ -106,20 +106,13 @@ class LocalAIClient:
|
||||
response_format: Optional[str] = None,
|
||||
include_text: Optional[bool] = None,
|
||||
verbose: bool = False,
|
||||
return_raw: bool = False,
|
||||
**_ignored,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Send audio to LocalAI /v1/audio/diarization and return a dict
|
||||
in the same style as the previous internal diarization output:
|
||||
|
||||
{
|
||||
"segments": [ [start, end], ... ],
|
||||
"speakers": [ "SPEAKER_00", ... ],
|
||||
"transcripts": [ "text for segment", ... ]
|
||||
}
|
||||
|
||||
Extra kwargs that the old UI used (e.g., whisper-specific) are
|
||||
accepted but ignored.
|
||||
Send audio to LocalAI /v1/audio/diarization and return:
|
||||
- A normalized dict with segments, speakers, transcripts.
|
||||
- Optionally, the raw verbose_json response (for JSON export).
|
||||
|
||||
Args:
|
||||
audio_path: Path to the audio file.
|
||||
@@ -131,16 +124,18 @@ class LocalAIClient:
|
||||
min_duration_on: Optional min segment duration.
|
||||
min_duration_off: Optional min gap duration.
|
||||
response_format: "json", "verbose_json", or "rttm".
|
||||
Defaults to "verbose_json" if not set.
|
||||
Defaults to "verbose_json".
|
||||
include_text: Whether to request per-segment text.
|
||||
Defaults to True.
|
||||
verbose: If True, prints progress messages.
|
||||
return_raw: If True, also return the raw API response in 'raw_result'.
|
||||
"""
|
||||
if verbose:
|
||||
print("Starting diarization and transcription via LocalAI.")
|
||||
|
||||
logger.info("diarize_and_transcribe requested for: %s", audio_path)
|
||||
|
||||
# Always use verbose_json for diarization + speaker info
|
||||
if response_format is None:
|
||||
response_format = "verbose_json"
|
||||
if include_text is None:
|
||||
@@ -202,7 +197,7 @@ class LocalAIClient:
|
||||
)
|
||||
|
||||
try:
|
||||
result = resp.json()
|
||||
raw_result = resp.json()
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse LocalAI response as JSON.")
|
||||
raise LocalAIError(
|
||||
@@ -212,11 +207,16 @@ class LocalAIClient:
|
||||
if verbose:
|
||||
print("Diarization and transcription finished. Starting post-processing.")
|
||||
|
||||
return self._parse_diarization_response(result)
|
||||
parsed = self._parse_diarization_response(raw_result)
|
||||
|
||||
if return_raw:
|
||||
parsed["raw_result"] = raw_result
|
||||
|
||||
return parsed
|
||||
|
||||
def _parse_diarization_response(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert LocalAI response into the internal format used by Scraibe:
|
||||
Convert LocalAI verbose_json response into the internal format used by Scraibe:
|
||||
{
|
||||
"segments": [ [start, end], ... ],
|
||||
"speakers": [ "SPEAKER_00", ... ],
|
||||
|
||||
Reference in New Issue
Block a user