mutterkey/transcriptiontypes_8h_source.html

#pragma once


#include <QMetaType>

#include <QString>

#include <QStringList>


#include <cstdint>

#include <vector>


enum class RuntimeErrorCode : std::uint8_t {

    None,

    Cancelled,

    InvalidConfig,

    ModelNotFound,

    InvalidModelPackage,

    UnsupportedModelPackageVersion,

    ModelIntegrityFailed,

    IncompatibleModelPackage,

    ModelTooLarge,

    ModelLoadFailed,

    AudioNormalizationFailed,

    UnsupportedLanguage,

    DecodeFailed,

    InternalRuntimeError,

};


struct RuntimeError {

    RuntimeErrorCode code = RuntimeErrorCode::None;

    QString message;

    QString detail;


    [[nodiscard]] bool isOk() const { return code == RuntimeErrorCode::None; }

};


struct BackendCapabilities {

    QString backendName;

    QStringList supportedLanguages;

    bool supportsAutoLanguage = false;

    bool supportsTranslation = false;

    bool supportsWarmup = false;

};


struct RuntimeDiagnostics {

    QString backendName;

    QString selectionReason;

    QString runtimeDescription;

    QString loadedModelDescription;

};


struct ModelMetadata {

    QString packageId;

    QString displayName;

    QString packageVersion;

    QString runtimeFamily;

    QString sourceFormat;

    QString modelFormat;

    QString architecture;

    QString languageProfile;

    QString quantization;

    QString tokenizer;

    bool legacyCompatibility = false;

    int vocabularySize = 0;

    int audioContext = 0;

    int audioState = 0;

    int audioHeadCount = 0;

    int audioLayerCount = 0;

    int textContext = 0;

    int textState = 0;

    int textHeadCount = 0;

    int textLayerCount = 0;

    int melCount = 0;

    int formatType = 0;

};


struct NormalizedAudio {

    std::vector<float> samples;

    int sampleRate = 16000;

    int channels = 1;


    [[nodiscard]] bool isValid() const { return !samples.empty(); }

};


struct AudioChunk {

    std::vector<float> samples;

    int sampleRate = 16000;

    int channels = 1;

    std::int64_t streamOffsetFrames = 0;


    [[nodiscard]] bool isValid() const { return !samples.empty(); }

};


enum class TranscriptEventKind : std::uint8_t {

    Partial,

    Final,

};


struct TranscriptEvent {

    TranscriptEventKind kind = TranscriptEventKind::Partial;

    QString text;

    std::int64_t startMs = -1;

    std::int64_t endMs = -1;

};


struct TranscriptUpdate {

    std::vector<TranscriptEvent> events;

    RuntimeError error;


    [[nodiscard]] bool isOk() const { return error.isOk(); }

};


struct TranscriptionResult {

    bool success = false;

    QString text;

    RuntimeError error;

};


Q_DECLARE_METATYPE(RuntimeErrorCode)

Q_DECLARE_METATYPE(RuntimeError)

Q_DECLARE_METATYPE(BackendCapabilities)

Q_DECLARE_METATYPE(ModelMetadata)

AudioChunk
One normalized streaming audio unit passed into a transcription session.
Definition transcriptiontypes.h:154

AudioChunk::sampleRate
int sampleRate
Sample rate of the chunk payload.
Definition transcriptiontypes.h:158

AudioChunk::samples
std::vector< float > samples
Mono float32 samples for this chunk.
Definition transcriptiontypes.h:156

AudioChunk::channels
int channels
Channel count of the chunk payload.
Definition transcriptiontypes.h:160

AudioChunk::streamOffsetFrames
std::int64_t streamOffsetFrames
Start frame offset of this chunk within the utterance stream.
Definition transcriptiontypes.h:162

AudioChunk::isValid
bool isValid() const
Reports whether the chunk contains usable audio samples.
Definition transcriptiontypes.h:168

BackendCapabilities
Product-owned backend/runtime metadata surfaced to app code.
Definition transcriptiontypes.h:56

BackendCapabilities::supportsAutoLanguage
bool supportsAutoLanguage
true when the backend can auto-detect the spoken language.
Definition transcriptiontypes.h:62

BackendCapabilities::supportedLanguages
QStringList supportedLanguages
Supported language codes accepted by this backend.
Definition transcriptiontypes.h:60

BackendCapabilities::supportsTranslation
bool supportsTranslation
true when the backend supports translation mode.
Definition transcriptiontypes.h:64

BackendCapabilities::backendName
QString backendName
Stable backend identifier used in diagnostics.
Definition transcriptiontypes.h:58

BackendCapabilities::supportsWarmup
bool supportsWarmup
true when warmup is a supported preflight operation.
Definition transcriptiontypes.h:66

ModelMetadata
Product-owned immutable metadata about a validated model artifact.
Definition transcriptiontypes.h:86

ModelMetadata::legacyCompatibility
bool legacyCompatibility
Raw-path compatibility marker for migration diagnostics.
Definition transcriptiontypes.h:108

ModelMetadata::quantization
QString quantization
Quantization metadata when known.
Definition transcriptiontypes.h:104

ModelMetadata::sourceFormat
QString sourceFormat
Source format imported or packaged by Mutterkey.
Definition transcriptiontypes.h:96

ModelMetadata::textContext
int textContext
Text context size when known.
Definition transcriptiontypes.h:120

ModelMetadata::packageId
QString packageId
Stable product-owned package identifier.
Definition transcriptiontypes.h:88

ModelMetadata::displayName
QString displayName
Human-readable package/model name.
Definition transcriptiontypes.h:90

ModelMetadata::formatType
int formatType
Backend-specific format type value when known.
Definition transcriptiontypes.h:130

ModelMetadata::textLayerCount
int textLayerCount
Text layer count when known.
Definition transcriptiontypes.h:126

ModelMetadata::languageProfile
QString languageProfile
Language profile such as en or multilingual.
Definition transcriptiontypes.h:102

ModelMetadata::melCount
int melCount
Mel filter count when known.
Definition transcriptiontypes.h:128

ModelMetadata::textState
int textState
Text state size when known.
Definition transcriptiontypes.h:122

ModelMetadata::packageVersion
QString packageVersion
Optional package version string.
Definition transcriptiontypes.h:92

ModelMetadata::tokenizer
QString tokenizer
Tokenizer metadata when known.
Definition transcriptiontypes.h:106

ModelMetadata::audioHeadCount
int audioHeadCount
Audio attention head count when known.
Definition transcriptiontypes.h:116

ModelMetadata::textHeadCount
int textHeadCount
Text attention head count when known.
Definition transcriptiontypes.h:124

ModelMetadata::audioLayerCount
int audioLayerCount
Audio layer count when known.
Definition transcriptiontypes.h:118

ModelMetadata::audioState
int audioState
Audio state size when known.
Definition transcriptiontypes.h:114

ModelMetadata::runtimeFamily
QString runtimeFamily
Runtime family this artifact belongs to.
Definition transcriptiontypes.h:94

ModelMetadata::vocabularySize
int vocabularySize
Vocabulary size when known.
Definition transcriptiontypes.h:110

ModelMetadata::audioContext
int audioContext
Audio context size when known.
Definition transcriptiontypes.h:112

ModelMetadata::modelFormat
QString modelFormat
Backend-facing model format marker such as ggml.
Definition transcriptiontypes.h:98

ModelMetadata::architecture
QString architecture
Model family or architecture string when known.
Definition transcriptiontypes.h:100

NormalizedAudio
Normalized runtime audio payload.
Definition transcriptiontypes.h:136

NormalizedAudio::samples
std::vector< float > samples
Mono float32 samples ready for runtime ingestion.
Definition transcriptiontypes.h:138

NormalizedAudio::isValid
bool isValid() const
Reports whether the normalized payload contains any samples.
Definition transcriptiontypes.h:148

NormalizedAudio::sampleRate
int sampleRate
Sample rate of the normalized audio. Kept at 16 kHz.
Definition transcriptiontypes.h:140

NormalizedAudio::channels
int channels
Channel count of the normalized audio. Kept at one channel.
Definition transcriptiontypes.h:142

RuntimeDiagnostics
Runtime inspection data kept separate from static backend capabilities.
Definition transcriptiontypes.h:72

RuntimeDiagnostics::runtimeDescription
QString runtimeDescription
Human-readable runtime and device summary.
Definition transcriptiontypes.h:78

RuntimeDiagnostics::selectionReason
QString selectionReason
Human-readable explanation for why this runtime was selected.
Definition transcriptiontypes.h:76

RuntimeDiagnostics::loadedModelDescription
QString loadedModelDescription
Loaded-model description when a model is available.
Definition transcriptiontypes.h:80

RuntimeDiagnostics::backendName
QString backendName
Stable backend identifier used in diagnostics.
Definition transcriptiontypes.h:74

RuntimeError
Structured runtime-layer failure with user-facing and diagnostic text.
Definition transcriptiontypes.h:38

RuntimeError::isOk
bool isOk() const
Reports whether this value represents success.
Definition transcriptiontypes.h:50

RuntimeError::detail
QString detail
Optional extra context for diagnostics.
Definition transcriptiontypes.h:44

RuntimeError::message
QString message
Human-readable summary safe to surface in logs or UI.
Definition transcriptiontypes.h:42

RuntimeError::code
RuntimeErrorCode code
Stable error category for programmatic handling and tests.
Definition transcriptiontypes.h:40

TranscriptEvent
One transcript event produced by a backend session.
Definition transcriptiontypes.h:182

TranscriptEvent::startMs
std::int64_t startMs
Optional inclusive event start timestamp in milliseconds.
Definition transcriptiontypes.h:188

TranscriptEvent::kind
TranscriptEventKind kind
Whether this event is partial or final.
Definition transcriptiontypes.h:184

TranscriptEvent::endMs
std::int64_t endMs
Optional exclusive event end timestamp in milliseconds.
Definition transcriptiontypes.h:190

TranscriptEvent::text
QString text
Transcript text payload for this event.
Definition transcriptiontypes.h:186

TranscriptUpdate
Result of one streaming session operation.
Definition transcriptiontypes.h:196

TranscriptUpdate::isOk
bool isOk() const
Reports whether this update completed without a runtime error.
Definition transcriptiontypes.h:206

TranscriptUpdate::error
RuntimeError error
Structured runtime failure when the operation did not succeed.
Definition transcriptiontypes.h:200

TranscriptUpdate::events
std::vector< TranscriptEvent > events
Zero or more transcript events emitted by the operation.
Definition transcriptiontypes.h:198

TranscriptionResult
Result of a single transcription attempt.
Definition transcriptiontypes.h:212

TranscriptionResult::error
RuntimeError error
Structured runtime failure when success is false.
Definition transcriptiontypes.h:218

TranscriptionResult::success
bool success
true when transcription completed successfully.
Definition transcriptiontypes.h:214

TranscriptionResult::text
QString text
Final recognized text when success is true.
Definition transcriptiontypes.h:216

TranscriptEventKind
TranscriptEventKind
Stable transcript event categories emitted by streaming sessions.
Definition transcriptiontypes.h:174

RuntimeErrorCode
RuntimeErrorCode
Stable categories for runtime-layer failures.
Definition transcriptiontypes.h:18