Use verbose_json diarization, add JSON+TXT email feature
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 05:36:45 +00:00
parent f6db48b1d0
commit b9d25a39dd
4 changed files with 421 additions and 54 deletions
+16 -16
View File
@@ -39,7 +39,7 @@ class LocalAIClient:
Responsibilities:
- Read configuration from environment.
- Upload audio file as multipart/form-data.
- Parse diarization + transcription response.
- Parse diarization + transcription response (verbose_json).
- Map response into the same structure expected by Scraibe's Transcript.
"""
@@ -106,20 +106,13 @@ class LocalAIClient:
response_format: Optional[str] = None,
include_text: Optional[bool] = None,
verbose: bool = False,
return_raw: bool = False,
**_ignored,
) -> Dict[str, Any]:
"""
Send audio to LocalAI /v1/audio/diarization and return a dict
in the same style as the previous internal diarization output:
{
"segments": [ [start, end], ... ],
"speakers": [ "SPEAKER_00", ... ],
"transcripts": [ "text for segment", ... ]
}
Extra kwargs that the old UI used (e.g., whisper-specific) are
accepted but ignored.
Send audio to LocalAI /v1/audio/diarization and return:
- A normalized dict with segments, speakers, transcripts.
- Optionally, the raw verbose_json response (for JSON export).
Args:
audio_path: Path to the audio file.
@@ -131,16 +124,18 @@ class LocalAIClient:
min_duration_on: Optional min segment duration.
min_duration_off: Optional min gap duration.
response_format: "json", "verbose_json", or "rttm".
Defaults to "verbose_json" if not set.
Defaults to "verbose_json".
include_text: Whether to request per-segment text.
Defaults to True.
verbose: If True, prints progress messages.
return_raw: If True, also return the raw API response in 'raw_result'.
"""
if verbose:
print("Starting diarization and transcription via LocalAI.")
logger.info("diarize_and_transcribe requested for: %s", audio_path)
# Always use verbose_json for diarization + speaker info
if response_format is None:
response_format = "verbose_json"
if include_text is None:
@@ -202,7 +197,7 @@ class LocalAIClient:
)
try:
result = resp.json()
raw_result = resp.json()
except json.JSONDecodeError:
logger.error("Failed to parse LocalAI response as JSON.")
raise LocalAIError(
@@ -212,11 +207,16 @@ class LocalAIClient:
if verbose:
print("Diarization and transcription finished. Starting post-processing.")
return self._parse_diarization_response(result)
parsed = self._parse_diarization_response(raw_result)
if return_raw:
parsed["raw_result"] = raw_result
return parsed
def _parse_diarization_response(self, result: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert LocalAI response into the internal format used by Scraibe:
Convert LocalAI verbose_json response into the internal format used by Scraibe:
{
"segments": [ [start, end], ... ],
"speakers": [ "SPEAKER_00", ... ],