Source code for whisper_smith.exporters

import json
from dataclasses import asdict

from whisper_smith.models import (
    DiarizationResult,
    TranscriptResult,
    TranscriptSegment,
)

SUPPORTED_FORMATS = ("txt", "md", "json", "srt", "vtt")
SUPPORTED_TIMESTAMP_FORMATS = ("seconds", "hms")



[docs]
def export_txt(transcript: TranscriptResult) -> str:
    lines = [_format_speaker_line(segment) for segment in transcript.segments]
    return "\n".join(lines) + "\n"




[docs]
def export_json(transcript: TranscriptResult, timestamp_format: str = "seconds") -> str:
    normalized_timestamp_format = timestamp_format.lower()
    if normalized_timestamp_format not in SUPPORTED_TIMESTAMP_FORMATS:
        supported = ", ".join(SUPPORTED_TIMESTAMP_FORMATS)
        raise ValueError(
            f"Unsupported timestamp format: {timestamp_format!r}. "
            f"Supported formats: {supported}"
        )

    payload = asdict(transcript)
    if normalized_timestamp_format == "hms":
        for segment in payload.get("segments", []):
            segment["start"] = _format_hms_time(float(segment["start"]))
            segment["end"] = _format_hms_time(float(segment["end"]))

    return json.dumps(
        payload,
        ensure_ascii=False,
        indent=2,
    ) + "\n"




[docs]
def export_diarization_json(diarization: DiarizationResult) -> str:
    return json.dumps(
        asdict(diarization),
        ensure_ascii=False,
        indent=2,
    ) + "\n"




[docs]
def export_diarization(
    diarization: DiarizationResult,
    output_format: str,
) -> str:
    normalized_format = output_format.lower().lstrip(".")

    if normalized_format == "json":
        return export_diarization_json(diarization)

    raise ValueError(
        f"Unsupported diarization output format: {output_format!r}. "
        "Supported formats: json"
    )




[docs]
def export_srt(transcript: TranscriptResult) -> str:
    blocks: list[str] = []

    for index, segment in enumerate(transcript.segments, start=1):
        blocks.append(
            "\n".join(
                [
                    str(index),
                    f"{_format_srt_time(segment.start)} --> {_format_srt_time(segment.end)}",
                    _format_speaker_line(segment),
                ]
            )
        )

    return "\n\n".join(blocks) + "\n"




[docs]
def export_vtt(transcript: TranscriptResult) -> str:
    blocks = ["WEBVTT"]

    for segment in transcript.segments:
        blocks.append(
            "\n".join(
                [
                    f"{_format_vtt_time(segment.start)} --> {_format_vtt_time(segment.end)}",
                    _format_speaker_line(segment),
                ]
            )
        )

    return "\n\n".join(blocks) + "\n"




[docs]
def export_md(transcript: TranscriptResult) -> str:
    lines = ["# Transcript", ""]

    for segment in transcript.segments:
        speaker = segment.speaker or "UNKNOWN"
        lines.extend(
            [
                f"**{speaker}**",
                "",
                segment.text,
                "",
            ]
        )

    if lines[-1] == "":
        lines.pop()

    return "\n".join(lines) + "\n"




[docs]
def export_transcript(
    transcript: TranscriptResult,
    output_format: str,
    timestamp_format: str = "seconds",
) -> str:
    normalized_format = output_format.lower().lstrip(".")

    if normalized_format == "txt":
        return export_txt(transcript)

    if normalized_format == "md":
        return export_md(transcript)

    if normalized_format == "json":
        return export_json(transcript, timestamp_format=timestamp_format)

    if normalized_format == "srt":
        return export_srt(transcript)

    if normalized_format == "vtt":
        return export_vtt(transcript)

    supported = ", ".join(SUPPORTED_FORMATS)
    raise ValueError(
        f"Unsupported output format: {output_format!r}. "
        f"Supported formats: {supported}"
    )



def _format_speaker_line(segment: TranscriptSegment) -> str:
    if segment.speaker:
        return f"{segment.speaker}: {segment.text}"

    return segment.text


def _format_srt_time(seconds: float) -> str:
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    whole_seconds = int(seconds)
    milliseconds = int(round((seconds - whole_seconds) * 1000))

    return f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02},{milliseconds:03}"


def _format_vtt_time(seconds: float) -> str:
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    whole_seconds = int(seconds)
    milliseconds = int(round((seconds - whole_seconds) * 1000))

    return f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02}.{milliseconds:03}"


def _format_hms_time(seconds: float) -> str:
    total_seconds = int(round(seconds))
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    secs = total_seconds % 60
    return f"{hours:02}:{minutes:02}:{secs:02}"