mirror of https://github.com/usememos/memos
feat(ai): add BYOK audio transcription (#5832)
parent
0ad0fec8d4
commit
101704c8ea
@ -0,0 +1,22 @@
|
||||
package ai
|
||||
|
||||
import "github.com/pkg/errors"
|
||||
|
||||
const (
|
||||
// DefaultOpenAITranscriptionModel is the built-in OpenAI transcription model.
|
||||
DefaultOpenAITranscriptionModel = "gpt-4o-transcribe"
|
||||
// DefaultGeminiTranscriptionModel is the built-in Gemini transcription model.
|
||||
DefaultGeminiTranscriptionModel = "gemini-2.5-flash"
|
||||
)
|
||||
|
||||
// DefaultTranscriptionModel returns the built-in transcription model for a provider.
|
||||
func DefaultTranscriptionModel(providerType ProviderType) (string, error) {
|
||||
switch providerType {
|
||||
case ProviderOpenAI:
|
||||
return DefaultOpenAITranscriptionModel, nil
|
||||
case ProviderGemini:
|
||||
return DefaultGeminiTranscriptionModel, nil
|
||||
default:
|
||||
return "", errors.Wrapf(ErrCapabilityUnsupported, "provider type %q", providerType)
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,6 @@
|
||||
export * from "./cacheService";
|
||||
export * from "./errorService";
|
||||
export * from "./memoService";
|
||||
export * from "./transcriptionService";
|
||||
export * from "./uploadService";
|
||||
export * from "./validationService";
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
import { create } from "@bufbuild/protobuf";
|
||||
import { aiServiceClient } from "@/connect";
|
||||
import { TranscribeRequestSchema, TranscriptionAudioSchema, TranscriptionConfigSchema } from "@/types/proto/api/v1/ai_service_pb";
|
||||
import type { InstanceSetting_AIProviderConfig } from "@/types/proto/api/v1/instance_service_pb";
|
||||
|
||||
export const transcriptionService = {
|
||||
async transcribeFile(file: File, provider: InstanceSetting_AIProviderConfig): Promise<string> {
|
||||
const content = new Uint8Array(await file.arrayBuffer());
|
||||
const response = await aiServiceClient.transcribe(
|
||||
create(TranscribeRequestSchema, {
|
||||
providerId: provider.id,
|
||||
config: create(TranscriptionConfigSchema, {}),
|
||||
audio: create(TranscriptionAudioSchema, {
|
||||
source: {
|
||||
case: "content",
|
||||
value: content,
|
||||
},
|
||||
filename: file.name,
|
||||
contentType: file.type,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
return response.text;
|
||||
},
|
||||
};
|
||||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue