mirror of https://github.com/usememos/memos
feat(ai): add instance AI providers and transcription (#5829)
Co-authored-by: memoclaw <265580040+memoclaw@users.noreply.github.com>pull/5830/head
parent
40fd700fb8
commit
83ed32f119
@ -0,0 +1,26 @@
|
||||
package ai
|
||||
|
||||
// ProviderType identifies an AI provider implementation.
|
||||
type ProviderType string
|
||||
|
||||
const (
|
||||
// ProviderOpenAI is OpenAI's hosted API.
|
||||
ProviderOpenAI ProviderType = "OPENAI"
|
||||
// ProviderOpenAICompatible is an OpenAI-compatible API endpoint.
|
||||
ProviderOpenAICompatible ProviderType = "OPENAI_COMPATIBLE"
|
||||
// ProviderAnthropic is Anthropic's API.
|
||||
ProviderAnthropic ProviderType = "ANTHROPIC"
|
||||
// ProviderGemini is Google's Gemini API.
|
||||
ProviderGemini ProviderType = "GEMINI"
|
||||
)
|
||||
|
||||
// ProviderConfig configures a callable AI provider connection.
|
||||
type ProviderConfig struct {
|
||||
ID string
|
||||
Title string
|
||||
Type ProviderType
|
||||
Endpoint string
|
||||
APIKey string
|
||||
Models []string
|
||||
DefaultModel string
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package ai
|
||||
|
||||
import "github.com/pkg/errors"
|
||||
|
||||
var (
|
||||
// ErrProviderNotFound indicates that a requested provider ID does not exist.
|
||||
ErrProviderNotFound = errors.New("AI provider not found")
|
||||
// ErrCapabilityUnsupported indicates that the provider does not support the requested capability.
|
||||
ErrCapabilityUnsupported = errors.New("AI provider capability unsupported")
|
||||
)
|
||||
@ -0,0 +1,59 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/usememos/memos/internal/ai"
|
||||
)
|
||||
|
||||
const defaultEndpoint = "https://api.openai.com/v1"
|
||||
|
||||
// Transcriber transcribes audio with OpenAI-compatible transcription APIs.
|
||||
type Transcriber struct {
|
||||
endpoint string
|
||||
apiKey string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// NewTranscriber creates a new OpenAI-compatible transcriber.
|
||||
func NewTranscriber(config ai.ProviderConfig, options ...Option) (*Transcriber, error) {
|
||||
endpoint := strings.TrimSpace(config.Endpoint)
|
||||
if endpoint == "" {
|
||||
endpoint = defaultEndpoint
|
||||
}
|
||||
if _, err := url.ParseRequestURI(endpoint); err != nil {
|
||||
return nil, errors.Wrap(err, "invalid OpenAI endpoint")
|
||||
}
|
||||
if config.APIKey == "" {
|
||||
return nil, errors.New("OpenAI API key is required")
|
||||
}
|
||||
|
||||
transcriber := &Transcriber{
|
||||
endpoint: endpoint,
|
||||
apiKey: config.APIKey,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 2 * time.Minute,
|
||||
},
|
||||
}
|
||||
for _, option := range options {
|
||||
option(transcriber)
|
||||
}
|
||||
return transcriber, nil
|
||||
}
|
||||
|
||||
// Option configures a Transcriber.
|
||||
type Option func(*Transcriber)
|
||||
|
||||
// WithHTTPClient sets the HTTP client used by the transcriber.
|
||||
func WithHTTPClient(client *http.Client) Option {
|
||||
return func(t *Transcriber) {
|
||||
if client != nil {
|
||||
t.httpClient = client
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,145 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"mime"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/textproto"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/usememos/memos/internal/ai"
|
||||
)
|
||||
|
||||
type transcriptionResponse struct {
|
||||
Text string `json:"text"`
|
||||
Language string `json:"language"`
|
||||
Duration float64 `json:"duration"`
|
||||
}
|
||||
|
||||
type errorResponse struct {
|
||||
Error struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
Code string `json:"code"`
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
// Transcribe transcribes audio with the /audio/transcriptions endpoint.
|
||||
func (t *Transcriber) Transcribe(ctx context.Context, request ai.TranscribeRequest) (*ai.TranscribeResponse, error) {
|
||||
if strings.TrimSpace(request.Model) == "" {
|
||||
return nil, errors.New("model is required")
|
||||
}
|
||||
if request.Audio == nil {
|
||||
return nil, errors.New("audio is required")
|
||||
}
|
||||
|
||||
body := &bytes.Buffer{}
|
||||
writer := multipart.NewWriter(body)
|
||||
if err := writeAudioFilePart(writer, request); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := writer.WriteField("model", request.Model); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to write model field")
|
||||
}
|
||||
if err := writer.WriteField("response_format", "json"); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to write response format field")
|
||||
}
|
||||
if request.Prompt != "" {
|
||||
if err := writer.WriteField("prompt", request.Prompt); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to write prompt field")
|
||||
}
|
||||
}
|
||||
if request.Language != "" {
|
||||
if err := writer.WriteField("language", request.Language); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to write language field")
|
||||
}
|
||||
}
|
||||
if err := writer.Close(); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to close multipart writer")
|
||||
}
|
||||
|
||||
httpRequest, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimRight(t.endpoint, "/")+"/audio/transcriptions", body)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create transcription request")
|
||||
}
|
||||
httpRequest.Header.Set("Authorization", "Bearer "+t.apiKey)
|
||||
httpRequest.Header.Set("Content-Type", writer.FormDataContentType())
|
||||
|
||||
httpResponse, err := t.httpClient.Do(httpRequest)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to send transcription request")
|
||||
}
|
||||
defer httpResponse.Body.Close()
|
||||
|
||||
responseBody, err := io.ReadAll(httpResponse.Body)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to read transcription response")
|
||||
}
|
||||
if httpResponse.StatusCode < http.StatusOK || httpResponse.StatusCode >= http.StatusMultipleChoices {
|
||||
return nil, errors.Errorf("transcription request failed with status %d: %s", httpResponse.StatusCode, extractErrorMessage(responseBody))
|
||||
}
|
||||
|
||||
var response transcriptionResponse
|
||||
if err := json.Unmarshal(responseBody, &response); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to unmarshal transcription response")
|
||||
}
|
||||
return &ai.TranscribeResponse{
|
||||
Text: response.Text,
|
||||
Language: response.Language,
|
||||
Duration: response.Duration,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func writeAudioFilePart(writer *multipart.Writer, request ai.TranscribeRequest) error {
|
||||
filename := strings.TrimSpace(request.Filename)
|
||||
if filename == "" {
|
||||
filename = "audio"
|
||||
}
|
||||
contentType := strings.TrimSpace(request.ContentType)
|
||||
if contentType == "" {
|
||||
contentType = "application/octet-stream"
|
||||
} else {
|
||||
mediaType, _, err := mime.ParseMediaType(contentType)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "invalid audio content type")
|
||||
}
|
||||
contentType = mediaType
|
||||
}
|
||||
|
||||
header := make(textproto.MIMEHeader)
|
||||
header.Set("Content-Disposition", mime.FormatMediaType("form-data", map[string]string{
|
||||
"name": "file",
|
||||
"filename": sanitizeFilename(filename),
|
||||
}))
|
||||
header.Set("Content-Type", contentType)
|
||||
part, err := writer.CreatePart(header)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to create audio file part")
|
||||
}
|
||||
if _, err := io.Copy(part, request.Audio); err != nil {
|
||||
return errors.Wrap(err, "failed to write audio file part")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractErrorMessage(responseBody []byte) string {
|
||||
var response errorResponse
|
||||
if err := json.Unmarshal(responseBody, &response); err == nil && response.Error.Message != "" {
|
||||
return response.Error.Message
|
||||
}
|
||||
return string(responseBody)
|
||||
}
|
||||
|
||||
func sanitizeFilename(filename string) string {
|
||||
filename = strings.NewReplacer("\r", "_", "\n", "_").Replace(filename)
|
||||
if strings.TrimSpace(filename) == "" {
|
||||
return "audio"
|
||||
}
|
||||
return filename
|
||||
}
|
||||
@ -0,0 +1,65 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/usememos/memos/internal/ai"
|
||||
)
|
||||
|
||||
func TestTranscribe(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, http.MethodPost, r.Method)
|
||||
require.Equal(t, "/audio/transcriptions", r.URL.Path)
|
||||
require.Equal(t, "Bearer test-key", r.Header.Get("Authorization"))
|
||||
require.NoError(t, r.ParseMultipartForm(10<<20))
|
||||
require.Equal(t, "gpt-4o-transcribe", r.FormValue("model"))
|
||||
require.Equal(t, "json", r.FormValue("response_format"))
|
||||
require.Equal(t, "domain words", r.FormValue("prompt"))
|
||||
require.Equal(t, "en", r.FormValue("language"))
|
||||
|
||||
file, header, err := r.FormFile("file")
|
||||
require.NoError(t, err)
|
||||
defer file.Close()
|
||||
require.Equal(t, "voice.wav", header.Filename)
|
||||
require.Equal(t, "audio/wav", header.Header.Get("Content-Type"))
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
require.NoError(t, json.NewEncoder(w).Encode(map[string]any{
|
||||
"text": "hello world",
|
||||
"language": "en",
|
||||
"duration": 1.5,
|
||||
}))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
transcriber, err := NewTranscriber(ai.ProviderConfig{
|
||||
Endpoint: server.URL,
|
||||
APIKey: "test-key",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
response, err := transcriber.Transcribe(ctx, ai.TranscribeRequest{
|
||||
Model: "gpt-4o-transcribe",
|
||||
Filename: "voice.wav",
|
||||
ContentType: "audio/wav",
|
||||
Audio: strings.NewReader("RIFF"),
|
||||
Prompt: "domain words",
|
||||
Language: "en",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "hello world", response.Text)
|
||||
require.Equal(t, "en", response.Language)
|
||||
require.Equal(t, 1.5, response.Duration)
|
||||
}
|
||||
@ -0,0 +1,16 @@
|
||||
package ai
|
||||
|
||||
import "github.com/pkg/errors"
|
||||
|
||||
// FindProvider returns the provider with the given ID.
|
||||
func FindProvider(providers []ProviderConfig, providerID string) (*ProviderConfig, error) {
|
||||
if providerID == "" {
|
||||
return nil, errors.Wrap(ErrProviderNotFound, "provider ID is required")
|
||||
}
|
||||
for _, provider := range providers {
|
||||
if provider.ID == providerID {
|
||||
return &provider, nil
|
||||
}
|
||||
}
|
||||
return nil, errors.Wrapf(ErrProviderNotFound, "provider ID %q", providerID)
|
||||
}
|
||||
@ -0,0 +1,29 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Transcriber transcribes audio into text.
|
||||
type Transcriber interface {
|
||||
Transcribe(ctx context.Context, request TranscribeRequest) (*TranscribeResponse, error)
|
||||
}
|
||||
|
||||
// TranscribeRequest contains an audio transcription request.
|
||||
type TranscribeRequest struct {
|
||||
Model string
|
||||
Filename string
|
||||
ContentType string
|
||||
Audio io.Reader
|
||||
Size int64
|
||||
Prompt string
|
||||
Language string
|
||||
}
|
||||
|
||||
// TranscribeResponse contains an audio transcription response.
|
||||
type TranscribeResponse struct {
|
||||
Text string
|
||||
Language string
|
||||
Duration float64
|
||||
}
|
||||
@ -0,0 +1,63 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package memos.api.v1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/api/client.proto";
|
||||
import "google/api/field_behavior.proto";
|
||||
|
||||
option go_package = "gen/api/v1";
|
||||
|
||||
service AIService {
|
||||
// Transcribe transcribes an audio file using an instance AI provider.
|
||||
rpc Transcribe(TranscribeRequest) returns (TranscribeResponse) {
|
||||
option (google.api.http) = {
|
||||
post: "/api/v1/ai:transcribe"
|
||||
body: "*"
|
||||
};
|
||||
option (google.api.method_signature) = "provider_id,config,audio";
|
||||
}
|
||||
}
|
||||
|
||||
message TranscribeRequest {
|
||||
// Required. The instance AI provider ID to use.
|
||||
string provider_id = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Required. Transcription options.
|
||||
TranscriptionConfig config = 2 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Required. Audio input.
|
||||
TranscriptionAudio audio = 3 [(google.api.field_behavior) = REQUIRED];
|
||||
}
|
||||
|
||||
message TranscriptionConfig {
|
||||
// Optional. The model to use. If empty, the provider's default model is used.
|
||||
string model = 1 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. A prompt to improve transcription quality.
|
||||
string prompt = 2 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. The language of the input audio.
|
||||
string language = 3 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
message TranscriptionAudio {
|
||||
oneof source {
|
||||
// Inline audio bytes.
|
||||
bytes content = 1 [(google.api.field_behavior) = INPUT_ONLY];
|
||||
|
||||
// URI for audio content. Reserved for future use.
|
||||
string uri = 2;
|
||||
}
|
||||
|
||||
// Optional. The uploaded filename.
|
||||
string filename = 3 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. The MIME type of the input audio.
|
||||
string content_type = 4 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
message TranscribeResponse {
|
||||
// The transcribed text.
|
||||
string text = 1;
|
||||
}
|
||||
@ -0,0 +1,382 @@
|
||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.36.11
|
||||
// protoc (unknown)
|
||||
// source: api/v1/ai_service.proto
|
||||
|
||||
package apiv1
|
||||
|
||||
import (
|
||||
_ "google.golang.org/genproto/googleapis/api/annotations"
|
||||
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
|
||||
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
|
||||
reflect "reflect"
|
||||
sync "sync"
|
||||
unsafe "unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
// Verify that this generated code is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
|
||||
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
|
||||
)
|
||||
|
||||
type TranscribeRequest struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// Required. The instance AI provider ID to use.
|
||||
ProviderId string `protobuf:"bytes,1,opt,name=provider_id,json=providerId,proto3" json:"provider_id,omitempty"`
|
||||
// Required. Transcription options.
|
||||
Config *TranscriptionConfig `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"`
|
||||
// Required. Audio input.
|
||||
Audio *TranscriptionAudio `protobuf:"bytes,3,opt,name=audio,proto3" json:"audio,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *TranscribeRequest) Reset() {
|
||||
*x = TranscribeRequest{}
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[0]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *TranscribeRequest) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TranscribeRequest) ProtoMessage() {}
|
||||
|
||||
func (x *TranscribeRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[0]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TranscribeRequest.ProtoReflect.Descriptor instead.
|
||||
func (*TranscribeRequest) Descriptor() ([]byte, []int) {
|
||||
return file_api_v1_ai_service_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
func (x *TranscribeRequest) GetProviderId() string {
|
||||
if x != nil {
|
||||
return x.ProviderId
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TranscribeRequest) GetConfig() *TranscriptionConfig {
|
||||
if x != nil {
|
||||
return x.Config
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *TranscribeRequest) GetAudio() *TranscriptionAudio {
|
||||
if x != nil {
|
||||
return x.Audio
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type TranscriptionConfig struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// Optional. The model to use. If empty, the provider's default model is used.
|
||||
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
|
||||
// Optional. A prompt to improve transcription quality.
|
||||
Prompt string `protobuf:"bytes,2,opt,name=prompt,proto3" json:"prompt,omitempty"`
|
||||
// Optional. The language of the input audio.
|
||||
Language string `protobuf:"bytes,3,opt,name=language,proto3" json:"language,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *TranscriptionConfig) Reset() {
|
||||
*x = TranscriptionConfig{}
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[1]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *TranscriptionConfig) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TranscriptionConfig) ProtoMessage() {}
|
||||
|
||||
func (x *TranscriptionConfig) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[1]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TranscriptionConfig.ProtoReflect.Descriptor instead.
|
||||
func (*TranscriptionConfig) Descriptor() ([]byte, []int) {
|
||||
return file_api_v1_ai_service_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
func (x *TranscriptionConfig) GetModel() string {
|
||||
if x != nil {
|
||||
return x.Model
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TranscriptionConfig) GetPrompt() string {
|
||||
if x != nil {
|
||||
return x.Prompt
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TranscriptionConfig) GetLanguage() string {
|
||||
if x != nil {
|
||||
return x.Language
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type TranscriptionAudio struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// Types that are valid to be assigned to Source:
|
||||
//
|
||||
// *TranscriptionAudio_Content
|
||||
// *TranscriptionAudio_Uri
|
||||
Source isTranscriptionAudio_Source `protobuf_oneof:"source"`
|
||||
// Optional. The uploaded filename.
|
||||
Filename string `protobuf:"bytes,3,opt,name=filename,proto3" json:"filename,omitempty"`
|
||||
// Optional. The MIME type of the input audio.
|
||||
ContentType string `protobuf:"bytes,4,opt,name=content_type,json=contentType,proto3" json:"content_type,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) Reset() {
|
||||
*x = TranscriptionAudio{}
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[2]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TranscriptionAudio) ProtoMessage() {}
|
||||
|
||||
func (x *TranscriptionAudio) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[2]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TranscriptionAudio.ProtoReflect.Descriptor instead.
|
||||
func (*TranscriptionAudio) Descriptor() ([]byte, []int) {
|
||||
return file_api_v1_ai_service_proto_rawDescGZIP(), []int{2}
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) GetSource() isTranscriptionAudio_Source {
|
||||
if x != nil {
|
||||
return x.Source
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) GetContent() []byte {
|
||||
if x != nil {
|
||||
if x, ok := x.Source.(*TranscriptionAudio_Content); ok {
|
||||
return x.Content
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) GetUri() string {
|
||||
if x != nil {
|
||||
if x, ok := x.Source.(*TranscriptionAudio_Uri); ok {
|
||||
return x.Uri
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) GetFilename() string {
|
||||
if x != nil {
|
||||
return x.Filename
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TranscriptionAudio) GetContentType() string {
|
||||
if x != nil {
|
||||
return x.ContentType
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type isTranscriptionAudio_Source interface {
|
||||
isTranscriptionAudio_Source()
|
||||
}
|
||||
|
||||
type TranscriptionAudio_Content struct {
|
||||
// Inline audio bytes.
|
||||
Content []byte `protobuf:"bytes,1,opt,name=content,proto3,oneof"`
|
||||
}
|
||||
|
||||
type TranscriptionAudio_Uri struct {
|
||||
// URI for audio content. Reserved for future use.
|
||||
Uri string `protobuf:"bytes,2,opt,name=uri,proto3,oneof"`
|
||||
}
|
||||
|
||||
func (*TranscriptionAudio_Content) isTranscriptionAudio_Source() {}
|
||||
|
||||
func (*TranscriptionAudio_Uri) isTranscriptionAudio_Source() {}
|
||||
|
||||
type TranscribeResponse struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// The transcribed text.
|
||||
Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *TranscribeResponse) Reset() {
|
||||
*x = TranscribeResponse{}
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[3]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *TranscribeResponse) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TranscribeResponse) ProtoMessage() {}
|
||||
|
||||
func (x *TranscribeResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_api_v1_ai_service_proto_msgTypes[3]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TranscribeResponse.ProtoReflect.Descriptor instead.
|
||||
func (*TranscribeResponse) Descriptor() ([]byte, []int) {
|
||||
return file_api_v1_ai_service_proto_rawDescGZIP(), []int{3}
|
||||
}
|
||||
|
||||
func (x *TranscribeResponse) GetText() string {
|
||||
if x != nil {
|
||||
return x.Text
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var File_api_v1_ai_service_proto protoreflect.FileDescriptor
|
||||
|
||||
const file_api_v1_ai_service_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"\x17api/v1/ai_service.proto\x12\fmemos.api.v1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\"\xb6\x01\n" +
|
||||
"\x11TranscribeRequest\x12$\n" +
|
||||
"\vprovider_id\x18\x01 \x01(\tB\x03\xe0A\x02R\n" +
|
||||
"providerId\x12>\n" +
|
||||
"\x06config\x18\x02 \x01(\v2!.memos.api.v1.TranscriptionConfigB\x03\xe0A\x02R\x06config\x12;\n" +
|
||||
"\x05audio\x18\x03 \x01(\v2 .memos.api.v1.TranscriptionAudioB\x03\xe0A\x02R\x05audio\"n\n" +
|
||||
"\x13TranscriptionConfig\x12\x19\n" +
|
||||
"\x05model\x18\x01 \x01(\tB\x03\xe0A\x01R\x05model\x12\x1b\n" +
|
||||
"\x06prompt\x18\x02 \x01(\tB\x03\xe0A\x01R\x06prompt\x12\x1f\n" +
|
||||
"\blanguage\x18\x03 \x01(\tB\x03\xe0A\x01R\blanguage\"\x9c\x01\n" +
|
||||
"\x12TranscriptionAudio\x12\x1f\n" +
|
||||
"\acontent\x18\x01 \x01(\fB\x03\xe0A\x04H\x00R\acontent\x12\x12\n" +
|
||||
"\x03uri\x18\x02 \x01(\tH\x00R\x03uri\x12\x1f\n" +
|
||||
"\bfilename\x18\x03 \x01(\tB\x03\xe0A\x01R\bfilename\x12&\n" +
|
||||
"\fcontent_type\x18\x04 \x01(\tB\x03\xe0A\x01R\vcontentTypeB\b\n" +
|
||||
"\x06source\"(\n" +
|
||||
"\x12TranscribeResponse\x12\x12\n" +
|
||||
"\x04text\x18\x01 \x01(\tR\x04text2\x9a\x01\n" +
|
||||
"\tAIService\x12\x8c\x01\n" +
|
||||
"\n" +
|
||||
"Transcribe\x12\x1f.memos.api.v1.TranscribeRequest\x1a .memos.api.v1.TranscribeResponse\";\xdaA\x18provider_id,config,audio\x82\xd3\xe4\x93\x02\x1a:\x01*\"\x15/api/v1/ai:transcribeB\xa6\x01\n" +
|
||||
"\x10com.memos.api.v1B\x0eAiServiceProtoP\x01Z0github.com/usememos/memos/proto/gen/api/v1;apiv1\xa2\x02\x03MAX\xaa\x02\fMemos.Api.V1\xca\x02\fMemos\\Api\\V1\xe2\x02\x18Memos\\Api\\V1\\GPBMetadata\xea\x02\x0eMemos::Api::V1b\x06proto3"
|
||||
|
||||
var (
|
||||
file_api_v1_ai_service_proto_rawDescOnce sync.Once
|
||||
file_api_v1_ai_service_proto_rawDescData []byte
|
||||
)
|
||||
|
||||
func file_api_v1_ai_service_proto_rawDescGZIP() []byte {
|
||||
file_api_v1_ai_service_proto_rawDescOnce.Do(func() {
|
||||
file_api_v1_ai_service_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_api_v1_ai_service_proto_rawDesc), len(file_api_v1_ai_service_proto_rawDesc)))
|
||||
})
|
||||
return file_api_v1_ai_service_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_api_v1_ai_service_proto_msgTypes = make([]protoimpl.MessageInfo, 4)
|
||||
var file_api_v1_ai_service_proto_goTypes = []any{
|
||||
(*TranscribeRequest)(nil), // 0: memos.api.v1.TranscribeRequest
|
||||
(*TranscriptionConfig)(nil), // 1: memos.api.v1.TranscriptionConfig
|
||||
(*TranscriptionAudio)(nil), // 2: memos.api.v1.TranscriptionAudio
|
||||
(*TranscribeResponse)(nil), // 3: memos.api.v1.TranscribeResponse
|
||||
}
|
||||
var file_api_v1_ai_service_proto_depIdxs = []int32{
|
||||
1, // 0: memos.api.v1.TranscribeRequest.config:type_name -> memos.api.v1.TranscriptionConfig
|
||||
2, // 1: memos.api.v1.TranscribeRequest.audio:type_name -> memos.api.v1.TranscriptionAudio
|
||||
0, // 2: memos.api.v1.AIService.Transcribe:input_type -> memos.api.v1.TranscribeRequest
|
||||
3, // 3: memos.api.v1.AIService.Transcribe:output_type -> memos.api.v1.TranscribeResponse
|
||||
3, // [3:4] is the sub-list for method output_type
|
||||
2, // [2:3] is the sub-list for method input_type
|
||||
2, // [2:2] is the sub-list for extension type_name
|
||||
2, // [2:2] is the sub-list for extension extendee
|
||||
0, // [0:2] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_api_v1_ai_service_proto_init() }
|
||||
func file_api_v1_ai_service_proto_init() {
|
||||
if File_api_v1_ai_service_proto != nil {
|
||||
return
|
||||
}
|
||||
file_api_v1_ai_service_proto_msgTypes[2].OneofWrappers = []any{
|
||||
(*TranscriptionAudio_Content)(nil),
|
||||
(*TranscriptionAudio_Uri)(nil),
|
||||
}
|
||||
type x struct{}
|
||||
out := protoimpl.TypeBuilder{
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_api_v1_ai_service_proto_rawDesc), len(file_api_v1_ai_service_proto_rawDesc)),
|
||||
NumEnums: 0,
|
||||
NumMessages: 4,
|
||||
NumExtensions: 0,
|
||||
NumServices: 1,
|
||||
},
|
||||
GoTypes: file_api_v1_ai_service_proto_goTypes,
|
||||
DependencyIndexes: file_api_v1_ai_service_proto_depIdxs,
|
||||
MessageInfos: file_api_v1_ai_service_proto_msgTypes,
|
||||
}.Build()
|
||||
File_api_v1_ai_service_proto = out.File
|
||||
file_api_v1_ai_service_proto_goTypes = nil
|
||||
file_api_v1_ai_service_proto_depIdxs = nil
|
||||
}
|
||||
@ -0,0 +1,157 @@
|
||||
// Code generated by protoc-gen-grpc-gateway. DO NOT EDIT.
|
||||
// source: api/v1/ai_service.proto
|
||||
|
||||
/*
|
||||
Package apiv1 is a reverse proxy.
|
||||
|
||||
It translates gRPC into RESTful JSON APIs.
|
||||
*/
|
||||
package apiv1
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
|
||||
"github.com/grpc-ecosystem/grpc-gateway/v2/utilities"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/grpclog"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"google.golang.org/grpc/status"
|
||||
"google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
// Suppress "imported and not used" errors
|
||||
var (
|
||||
_ codes.Code
|
||||
_ io.Reader
|
||||
_ status.Status
|
||||
_ = errors.New
|
||||
_ = runtime.String
|
||||
_ = utilities.NewDoubleArray
|
||||
_ = metadata.Join
|
||||
)
|
||||
|
||||
func request_AIService_Transcribe_0(ctx context.Context, marshaler runtime.Marshaler, client AIServiceClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
|
||||
var (
|
||||
protoReq TranscribeRequest
|
||||
metadata runtime.ServerMetadata
|
||||
)
|
||||
if err := marshaler.NewDecoder(req.Body).Decode(&protoReq); err != nil && !errors.Is(err, io.EOF) {
|
||||
return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
|
||||
}
|
||||
if req.Body != nil {
|
||||
_, _ = io.Copy(io.Discard, req.Body)
|
||||
}
|
||||
msg, err := client.Transcribe(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
|
||||
return msg, metadata, err
|
||||
}
|
||||
|
||||
func local_request_AIService_Transcribe_0(ctx context.Context, marshaler runtime.Marshaler, server AIServiceServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
|
||||
var (
|
||||
protoReq TranscribeRequest
|
||||
metadata runtime.ServerMetadata
|
||||
)
|
||||
if err := marshaler.NewDecoder(req.Body).Decode(&protoReq); err != nil && !errors.Is(err, io.EOF) {
|
||||
return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
|
||||
}
|
||||
msg, err := server.Transcribe(ctx, &protoReq)
|
||||
return msg, metadata, err
|
||||
}
|
||||
|
||||
// RegisterAIServiceHandlerServer registers the http handlers for service AIService to "mux".
|
||||
// UnaryRPC :call AIServiceServer directly.
|
||||
// StreamingRPC :currently unsupported pending https://github.com/grpc/grpc-go/issues/906.
|
||||
// Note that using this registration option will cause many gRPC library features to stop working. Consider using RegisterAIServiceHandlerFromEndpoint instead.
|
||||
// GRPC interceptors will not work for this type of registration. To use interceptors, you must use the "runtime.WithMiddlewares" option in the "runtime.NewServeMux" call.
|
||||
func RegisterAIServiceHandlerServer(ctx context.Context, mux *runtime.ServeMux, server AIServiceServer) error {
|
||||
mux.Handle(http.MethodPost, pattern_AIService_Transcribe_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
|
||||
ctx, cancel := context.WithCancel(req.Context())
|
||||
defer cancel()
|
||||
var stream runtime.ServerTransportStream
|
||||
ctx = grpc.NewContextWithServerTransportStream(ctx, &stream)
|
||||
inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
|
||||
annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/memos.api.v1.AIService/Transcribe", runtime.WithHTTPPathPattern("/api/v1/ai:transcribe"))
|
||||
if err != nil {
|
||||
runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
|
||||
return
|
||||
}
|
||||
resp, md, err := local_request_AIService_Transcribe_0(annotatedContext, inboundMarshaler, server, req, pathParams)
|
||||
md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer())
|
||||
annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
|
||||
if err != nil {
|
||||
runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
|
||||
return
|
||||
}
|
||||
forward_AIService_Transcribe_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RegisterAIServiceHandlerFromEndpoint is same as RegisterAIServiceHandler but
|
||||
// automatically dials to "endpoint" and closes the connection when "ctx" gets done.
|
||||
func RegisterAIServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
|
||||
conn, err := grpc.NewClient(endpoint, opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if cerr := conn.Close(); cerr != nil {
|
||||
grpclog.Errorf("Failed to close conn to %s: %v", endpoint, cerr)
|
||||
}
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
if cerr := conn.Close(); cerr != nil {
|
||||
grpclog.Errorf("Failed to close conn to %s: %v", endpoint, cerr)
|
||||
}
|
||||
}()
|
||||
}()
|
||||
return RegisterAIServiceHandler(ctx, mux, conn)
|
||||
}
|
||||
|
||||
// RegisterAIServiceHandler registers the http handlers for service AIService to "mux".
|
||||
// The handlers forward requests to the grpc endpoint over "conn".
|
||||
func RegisterAIServiceHandler(ctx context.Context, mux *runtime.ServeMux, conn *grpc.ClientConn) error {
|
||||
return RegisterAIServiceHandlerClient(ctx, mux, NewAIServiceClient(conn))
|
||||
}
|
||||
|
||||
// RegisterAIServiceHandlerClient registers the http handlers for service AIService
|
||||
// to "mux". The handlers forward requests to the grpc endpoint over the given implementation of "AIServiceClient".
|
||||
// Note: the gRPC framework executes interceptors within the gRPC handler. If the passed in "AIServiceClient"
|
||||
// doesn't go through the normal gRPC flow (creating a gRPC client etc.) then it will be up to the passed in
|
||||
// "AIServiceClient" to call the correct interceptors. This client ignores the HTTP middlewares.
|
||||
func RegisterAIServiceHandlerClient(ctx context.Context, mux *runtime.ServeMux, client AIServiceClient) error {
|
||||
mux.Handle(http.MethodPost, pattern_AIService_Transcribe_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
|
||||
ctx, cancel := context.WithCancel(req.Context())
|
||||
defer cancel()
|
||||
inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
|
||||
annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/memos.api.v1.AIService/Transcribe", runtime.WithHTTPPathPattern("/api/v1/ai:transcribe"))
|
||||
if err != nil {
|
||||
runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
|
||||
return
|
||||
}
|
||||
resp, md, err := request_AIService_Transcribe_0(annotatedContext, inboundMarshaler, client, req, pathParams)
|
||||
annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md)
|
||||
if err != nil {
|
||||
runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err)
|
||||
return
|
||||
}
|
||||
forward_AIService_Transcribe_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
pattern_AIService_Transcribe_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"api", "v1", "ai"}, "transcribe"))
|
||||
)
|
||||
|
||||
var (
|
||||
forward_AIService_Transcribe_0 = runtime.ForwardResponseMessage
|
||||
)
|
||||
@ -0,0 +1,123 @@
|
||||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||
// versions:
|
||||
// - protoc-gen-go-grpc v1.6.1
|
||||
// - protoc (unknown)
|
||||
// source: api/v1/ai_service.proto
|
||||
|
||||
package apiv1
|
||||
|
||||
import (
|
||||
context "context"
|
||||
grpc "google.golang.org/grpc"
|
||||
codes "google.golang.org/grpc/codes"
|
||||
status "google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// This is a compile-time assertion to ensure that this generated file
|
||||
// is compatible with the grpc package it is being compiled against.
|
||||
// Requires gRPC-Go v1.64.0 or later.
|
||||
const _ = grpc.SupportPackageIsVersion9
|
||||
|
||||
const (
|
||||
AIService_Transcribe_FullMethodName = "/memos.api.v1.AIService/Transcribe"
|
||||
)
|
||||
|
||||
// AIServiceClient is the client API for AIService service.
|
||||
//
|
||||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||
type AIServiceClient interface {
|
||||
// Transcribe transcribes an audio file using an instance AI provider.
|
||||
Transcribe(ctx context.Context, in *TranscribeRequest, opts ...grpc.CallOption) (*TranscribeResponse, error)
|
||||
}
|
||||
|
||||
type aIServiceClient struct {
|
||||
cc grpc.ClientConnInterface
|
||||
}
|
||||
|
||||
func NewAIServiceClient(cc grpc.ClientConnInterface) AIServiceClient {
|
||||
return &aIServiceClient{cc}
|
||||
}
|
||||
|
||||
func (c *aIServiceClient) Transcribe(ctx context.Context, in *TranscribeRequest, opts ...grpc.CallOption) (*TranscribeResponse, error) {
|
||||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
|
||||
out := new(TranscribeResponse)
|
||||
err := c.cc.Invoke(ctx, AIService_Transcribe_FullMethodName, in, out, cOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// AIServiceServer is the server API for AIService service.
|
||||
// All implementations must embed UnimplementedAIServiceServer
|
||||
// for forward compatibility.
|
||||
type AIServiceServer interface {
|
||||
// Transcribe transcribes an audio file using an instance AI provider.
|
||||
Transcribe(context.Context, *TranscribeRequest) (*TranscribeResponse, error)
|
||||
mustEmbedUnimplementedAIServiceServer()
|
||||
}
|
||||
|
||||
// UnimplementedAIServiceServer must be embedded to have
|
||||
// forward compatible implementations.
|
||||
//
|
||||
// NOTE: this should be embedded by value instead of pointer to avoid a nil
|
||||
// pointer dereference when methods are called.
|
||||
type UnimplementedAIServiceServer struct{}
|
||||
|
||||
func (UnimplementedAIServiceServer) Transcribe(context.Context, *TranscribeRequest) (*TranscribeResponse, error) {
|
||||
return nil, status.Error(codes.Unimplemented, "method Transcribe not implemented")
|
||||
}
|
||||
func (UnimplementedAIServiceServer) mustEmbedUnimplementedAIServiceServer() {}
|
||||
func (UnimplementedAIServiceServer) testEmbeddedByValue() {}
|
||||
|
||||
// UnsafeAIServiceServer may be embedded to opt out of forward compatibility for this service.
|
||||
// Use of this interface is not recommended, as added methods to AIServiceServer will
|
||||
// result in compilation errors.
|
||||
type UnsafeAIServiceServer interface {
|
||||
mustEmbedUnimplementedAIServiceServer()
|
||||
}
|
||||
|
||||
func RegisterAIServiceServer(s grpc.ServiceRegistrar, srv AIServiceServer) {
|
||||
// If the following call panics, it indicates UnimplementedAIServiceServer was
|
||||
// embedded by pointer and is nil. This will cause panics if an
|
||||
// unimplemented method is ever invoked, so we test this at initialization
|
||||
// time to prevent it from happening at runtime later due to I/O.
|
||||
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
|
||||
t.testEmbeddedByValue()
|
||||
}
|
||||
s.RegisterService(&AIService_ServiceDesc, srv)
|
||||
}
|
||||
|
||||
func _AIService_Transcribe_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(TranscribeRequest)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(AIServiceServer).Transcribe(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: AIService_Transcribe_FullMethodName,
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(AIServiceServer).Transcribe(ctx, req.(*TranscribeRequest))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
// AIService_ServiceDesc is the grpc.ServiceDesc for AIService service.
|
||||
// It's only intended for direct use with grpc.RegisterService,
|
||||
// and not to be introspected or modified (even as a copy)
|
||||
var AIService_ServiceDesc = grpc.ServiceDesc{
|
||||
ServiceName: "memos.api.v1.AIService",
|
||||
HandlerType: (*AIServiceServer)(nil),
|
||||
Methods: []grpc.MethodDesc{
|
||||
{
|
||||
MethodName: "Transcribe",
|
||||
Handler: _AIService_Transcribe_Handler,
|
||||
},
|
||||
},
|
||||
Streams: []grpc.StreamDesc{},
|
||||
Metadata: "api/v1/ai_service.proto",
|
||||
}
|
||||
@ -0,0 +1,110 @@
|
||||
// Code generated by protoc-gen-connect-go. DO NOT EDIT.
|
||||
//
|
||||
// Source: api/v1/ai_service.proto
|
||||
|
||||
package apiv1connect
|
||||
|
||||
import (
|
||||
connect "connectrpc.com/connect"
|
||||
context "context"
|
||||
errors "errors"
|
||||
v1 "github.com/usememos/memos/proto/gen/api/v1"
|
||||
http "net/http"
|
||||
strings "strings"
|
||||
)
|
||||
|
||||
// This is a compile-time assertion to ensure that this generated file and the connect package are
|
||||
// compatible. If you get a compiler error that this constant is not defined, this code was
|
||||
// generated with a version of connect newer than the one compiled into your binary. You can fix the
|
||||
// problem by either regenerating this code with an older version of connect or updating the connect
|
||||
// version compiled into your binary.
|
||||
const _ = connect.IsAtLeastVersion1_13_0
|
||||
|
||||
const (
|
||||
// AIServiceName is the fully-qualified name of the AIService service.
|
||||
AIServiceName = "memos.api.v1.AIService"
|
||||
)
|
||||
|
||||
// These constants are the fully-qualified names of the RPCs defined in this package. They're
|
||||
// exposed at runtime as Spec.Procedure and as the final two segments of the HTTP route.
|
||||
//
|
||||
// Note that these are different from the fully-qualified method names used by
|
||||
// google.golang.org/protobuf/reflect/protoreflect. To convert from these constants to
|
||||
// reflection-formatted method names, remove the leading slash and convert the remaining slash to a
|
||||
// period.
|
||||
const (
|
||||
// AIServiceTranscribeProcedure is the fully-qualified name of the AIService's Transcribe RPC.
|
||||
AIServiceTranscribeProcedure = "/memos.api.v1.AIService/Transcribe"
|
||||
)
|
||||
|
||||
// AIServiceClient is a client for the memos.api.v1.AIService service.
|
||||
type AIServiceClient interface {
|
||||
// Transcribe transcribes an audio file using an instance AI provider.
|
||||
Transcribe(context.Context, *connect.Request[v1.TranscribeRequest]) (*connect.Response[v1.TranscribeResponse], error)
|
||||
}
|
||||
|
||||
// NewAIServiceClient constructs a client for the memos.api.v1.AIService service. By default, it
|
||||
// uses the Connect protocol with the binary Protobuf Codec, asks for gzipped responses, and sends
|
||||
// uncompressed requests. To use the gRPC or gRPC-Web protocols, supply the connect.WithGRPC() or
|
||||
// connect.WithGRPCWeb() options.
|
||||
//
|
||||
// The URL supplied here should be the base URL for the Connect or gRPC server (for example,
|
||||
// http://api.acme.com or https://acme.com/grpc).
|
||||
func NewAIServiceClient(httpClient connect.HTTPClient, baseURL string, opts ...connect.ClientOption) AIServiceClient {
|
||||
baseURL = strings.TrimRight(baseURL, "/")
|
||||
aIServiceMethods := v1.File_api_v1_ai_service_proto.Services().ByName("AIService").Methods()
|
||||
return &aIServiceClient{
|
||||
transcribe: connect.NewClient[v1.TranscribeRequest, v1.TranscribeResponse](
|
||||
httpClient,
|
||||
baseURL+AIServiceTranscribeProcedure,
|
||||
connect.WithSchema(aIServiceMethods.ByName("Transcribe")),
|
||||
connect.WithClientOptions(opts...),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// aIServiceClient implements AIServiceClient.
|
||||
type aIServiceClient struct {
|
||||
transcribe *connect.Client[v1.TranscribeRequest, v1.TranscribeResponse]
|
||||
}
|
||||
|
||||
// Transcribe calls memos.api.v1.AIService.Transcribe.
|
||||
func (c *aIServiceClient) Transcribe(ctx context.Context, req *connect.Request[v1.TranscribeRequest]) (*connect.Response[v1.TranscribeResponse], error) {
|
||||
return c.transcribe.CallUnary(ctx, req)
|
||||
}
|
||||
|
||||
// AIServiceHandler is an implementation of the memos.api.v1.AIService service.
|
||||
type AIServiceHandler interface {
|
||||
// Transcribe transcribes an audio file using an instance AI provider.
|
||||
Transcribe(context.Context, *connect.Request[v1.TranscribeRequest]) (*connect.Response[v1.TranscribeResponse], error)
|
||||
}
|
||||
|
||||
// NewAIServiceHandler builds an HTTP handler from the service implementation. It returns the path
|
||||
// on which to mount the handler and the handler itself.
|
||||
//
|
||||
// By default, handlers support the Connect, gRPC, and gRPC-Web protocols with the binary Protobuf
|
||||
// and JSON codecs. They also support gzip compression.
|
||||
func NewAIServiceHandler(svc AIServiceHandler, opts ...connect.HandlerOption) (string, http.Handler) {
|
||||
aIServiceMethods := v1.File_api_v1_ai_service_proto.Services().ByName("AIService").Methods()
|
||||
aIServiceTranscribeHandler := connect.NewUnaryHandler(
|
||||
AIServiceTranscribeProcedure,
|
||||
svc.Transcribe,
|
||||
connect.WithSchema(aIServiceMethods.ByName("Transcribe")),
|
||||
connect.WithHandlerOptions(opts...),
|
||||
)
|
||||
return "/memos.api.v1.AIService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case AIServiceTranscribeProcedure:
|
||||
aIServiceTranscribeHandler.ServeHTTP(w, r)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// UnimplementedAIServiceHandler returns CodeUnimplemented from all methods.
|
||||
type UnimplementedAIServiceHandler struct{}
|
||||
|
||||
func (UnimplementedAIServiceHandler) Transcribe(context.Context, *connect.Request[v1.TranscribeRequest]) (*connect.Response[v1.TranscribeResponse], error) {
|
||||
return nil, connect.NewError(connect.CodeUnimplemented, errors.New("memos.api.v1.AIService.Transcribe is not implemented"))
|
||||
}
|
||||
@ -0,0 +1,198 @@
|
||||
package v1
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"mime"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/usememos/memos/internal/ai"
|
||||
"github.com/usememos/memos/internal/ai/openai"
|
||||
v1pb "github.com/usememos/memos/proto/gen/api/v1"
|
||||
storepb "github.com/usememos/memos/proto/gen/store"
|
||||
)
|
||||
|
||||
const (
|
||||
maxTranscriptionAudioSizeBytes = 25 * MebiByte
|
||||
maxTranscriptionPromptLength = 4096
|
||||
maxTranscriptionLanguageLength = 32
|
||||
maxTranscriptionFilenameLength = 255
|
||||
)
|
||||
|
||||
var supportedTranscriptionContentTypes = map[string]bool{
|
||||
"audio/mpeg": true,
|
||||
"audio/mp4": true,
|
||||
"audio/mpga": true,
|
||||
"audio/wav": true,
|
||||
"audio/x-wav": true,
|
||||
"audio/webm": true,
|
||||
"audio/x-m4a": true,
|
||||
"video/mp4": true,
|
||||
"video/mpeg": true,
|
||||
"video/webm": true,
|
||||
}
|
||||
|
||||
// Transcribe transcribes an audio file using an instance AI provider.
|
||||
func (s *APIV1Service) Transcribe(ctx context.Context, request *v1pb.TranscribeRequest) (*v1pb.TranscribeResponse, error) {
|
||||
user, err := s.fetchCurrentUser(ctx)
|
||||
if err != nil {
|
||||
return nil, status.Errorf(codes.Internal, "failed to get current user: %v", err)
|
||||
}
|
||||
if user == nil {
|
||||
return nil, status.Errorf(codes.Unauthenticated, "user not authenticated")
|
||||
}
|
||||
|
||||
if strings.TrimSpace(request.ProviderId) == "" {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "provider_id is required")
|
||||
}
|
||||
if request.Config == nil {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "config is required")
|
||||
}
|
||||
prompt := strings.TrimSpace(request.Config.GetPrompt())
|
||||
if len(prompt) > maxTranscriptionPromptLength {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "prompt is too long; maximum length is %d characters", maxTranscriptionPromptLength)
|
||||
}
|
||||
language := strings.TrimSpace(request.Config.GetLanguage())
|
||||
if len(language) > maxTranscriptionLanguageLength {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "language is too long; maximum length is %d characters", maxTranscriptionLanguageLength)
|
||||
}
|
||||
if request.Audio == nil {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "audio is required")
|
||||
}
|
||||
if request.Audio.GetUri() != "" {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "audio uri is not supported")
|
||||
}
|
||||
content := request.Audio.GetContent()
|
||||
if len(content) == 0 {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "audio content is required")
|
||||
}
|
||||
if len(content) > maxTranscriptionAudioSizeBytes {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "audio file is too large; maximum size is 25 MiB")
|
||||
}
|
||||
filename := strings.TrimSpace(request.Audio.GetFilename())
|
||||
if len(filename) > maxTranscriptionFilenameLength {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "filename is too long; maximum length is %d characters", maxTranscriptionFilenameLength)
|
||||
}
|
||||
contentType := strings.TrimSpace(request.Audio.GetContentType())
|
||||
if contentType == "" {
|
||||
contentType = http.DetectContentType(content)
|
||||
}
|
||||
if !isSupportedTranscriptionContentType(contentType) {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "audio content type %q is not supported", contentType)
|
||||
}
|
||||
|
||||
provider, model, err := s.resolveAIProviderForTranscription(ctx, request.ProviderId, request.Config.GetModel())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
transcriber, err := newAITranscriber(provider)
|
||||
if err != nil {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "failed to create AI transcriber: %v", err)
|
||||
}
|
||||
|
||||
transcription, err := transcriber.Transcribe(ctx, ai.TranscribeRequest{
|
||||
Model: model,
|
||||
Filename: filename,
|
||||
ContentType: contentType,
|
||||
Audio: bytes.NewReader(content),
|
||||
Size: int64(len(content)),
|
||||
Prompt: prompt,
|
||||
Language: language,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, status.Errorf(codes.Internal, "failed to transcribe audio: %v", err)
|
||||
}
|
||||
return &v1pb.TranscribeResponse{
|
||||
Text: transcription.Text,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *APIV1Service) resolveAIProviderForTranscription(ctx context.Context, providerID string, model string) (ai.ProviderConfig, string, error) {
|
||||
setting, err := s.Store.GetInstanceAISetting(ctx)
|
||||
if err != nil {
|
||||
return ai.ProviderConfig{}, "", status.Errorf(codes.Internal, "failed to get AI setting: %v", err)
|
||||
}
|
||||
|
||||
providers := make([]ai.ProviderConfig, 0, len(setting.GetProviders()))
|
||||
for _, provider := range setting.GetProviders() {
|
||||
if provider == nil {
|
||||
continue
|
||||
}
|
||||
providers = append(providers, convertAIProviderConfigFromStore(provider))
|
||||
}
|
||||
|
||||
provider, err := ai.FindProvider(providers, providerID)
|
||||
if err != nil {
|
||||
return ai.ProviderConfig{}, "", status.Errorf(codes.NotFound, "AI provider not found")
|
||||
}
|
||||
selectedModel := strings.TrimSpace(model)
|
||||
if selectedModel == "" {
|
||||
selectedModel = provider.DefaultModel
|
||||
}
|
||||
if selectedModel == "" {
|
||||
return ai.ProviderConfig{}, "", status.Errorf(codes.InvalidArgument, "model is required")
|
||||
}
|
||||
if !containsString(provider.Models, selectedModel) {
|
||||
return ai.ProviderConfig{}, "", status.Errorf(codes.InvalidArgument, "model %q is not configured for provider %q", selectedModel, provider.ID)
|
||||
}
|
||||
return *provider, selectedModel, nil
|
||||
}
|
||||
|
||||
func convertAIProviderConfigFromStore(provider *storepb.AIProviderConfig) ai.ProviderConfig {
|
||||
return ai.ProviderConfig{
|
||||
ID: provider.GetId(),
|
||||
Title: provider.GetTitle(),
|
||||
Type: convertAIProviderTypeFromStore(provider.GetType()),
|
||||
Endpoint: provider.GetEndpoint(),
|
||||
APIKey: provider.GetApiKey(),
|
||||
Models: provider.GetModels(),
|
||||
DefaultModel: provider.GetDefaultModel(),
|
||||
}
|
||||
}
|
||||
|
||||
func convertAIProviderTypeFromStore(providerType storepb.AIProviderType) ai.ProviderType {
|
||||
switch providerType {
|
||||
case storepb.AIProviderType_OPENAI:
|
||||
return ai.ProviderOpenAI
|
||||
case storepb.AIProviderType_OPENAI_COMPATIBLE:
|
||||
return ai.ProviderOpenAICompatible
|
||||
case storepb.AIProviderType_ANTHROPIC:
|
||||
return ai.ProviderAnthropic
|
||||
case storepb.AIProviderType_GEMINI:
|
||||
return ai.ProviderGemini
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func newAITranscriber(provider ai.ProviderConfig) (ai.Transcriber, error) {
|
||||
switch provider.Type {
|
||||
case ai.ProviderOpenAI, ai.ProviderOpenAICompatible:
|
||||
return openai.NewTranscriber(provider)
|
||||
default:
|
||||
return nil, errors.Wrapf(ai.ErrCapabilityUnsupported, "provider type %q", provider.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func containsString(values []string, target string) bool {
|
||||
for _, value := range values {
|
||||
if value == target {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isSupportedTranscriptionContentType(contentType string) bool {
|
||||
mediaType, _, err := mime.ParseMediaType(strings.TrimSpace(contentType))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
mediaType = strings.ToLower(mediaType)
|
||||
return supportedTranscriptionContentTypes[mediaType]
|
||||
}
|
||||
@ -0,0 +1,185 @@
|
||||
package test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
v1pb "github.com/usememos/memos/proto/gen/api/v1"
|
||||
storepb "github.com/usememos/memos/proto/gen/store"
|
||||
)
|
||||
|
||||
func TestTranscribe(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("requires authentication", func(t *testing.T) {
|
||||
ts := NewTestService(t)
|
||||
defer ts.Cleanup()
|
||||
|
||||
_, err := ts.Service.Transcribe(ctx, &v1pb.TranscribeRequest{
|
||||
ProviderId: "openai-main",
|
||||
Config: &v1pb.TranscriptionConfig{
|
||||
Model: "gpt-4o-transcribe",
|
||||
},
|
||||
Audio: &v1pb.TranscriptionAudio{
|
||||
Source: &v1pb.TranscriptionAudio_Content{Content: []byte("RIFF")},
|
||||
Filename: "voice.wav",
|
||||
ContentType: "audio/wav",
|
||||
},
|
||||
})
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "user not authenticated")
|
||||
})
|
||||
|
||||
t.Run("transcribes audio file with configured provider", func(t *testing.T) {
|
||||
ts := NewTestService(t)
|
||||
defer ts.Cleanup()
|
||||
|
||||
user, err := ts.CreateRegularUser(ctx, "alice")
|
||||
require.NoError(t, err)
|
||||
userCtx := ts.CreateUserContext(ctx, user.ID)
|
||||
|
||||
openAIServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, "/audio/transcriptions", r.URL.Path)
|
||||
require.Equal(t, "Bearer sk-test", r.Header.Get("Authorization"))
|
||||
require.NoError(t, r.ParseMultipartForm(10<<20))
|
||||
require.Equal(t, "gpt-4o-transcribe", r.FormValue("model"))
|
||||
require.Equal(t, "names: Alice", r.FormValue("prompt"))
|
||||
|
||||
file, header, err := r.FormFile("file")
|
||||
require.NoError(t, err)
|
||||
defer file.Close()
|
||||
require.Equal(t, "voice.wav", header.Filename)
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
require.NoError(t, json.NewEncoder(w).Encode(map[string]string{
|
||||
"text": "transcribed text",
|
||||
}))
|
||||
}))
|
||||
defer openAIServer.Close()
|
||||
|
||||
_, err = ts.Store.UpsertInstanceSetting(ctx, &storepb.InstanceSetting{
|
||||
Key: storepb.InstanceSettingKey_AI,
|
||||
Value: &storepb.InstanceSetting_AiSetting{
|
||||
AiSetting: &storepb.InstanceAISetting{
|
||||
Providers: []*storepb.AIProviderConfig{
|
||||
{
|
||||
Id: "openai-main",
|
||||
Title: "OpenAI",
|
||||
Type: storepb.AIProviderType_OPENAI_COMPATIBLE,
|
||||
Endpoint: openAIServer.URL,
|
||||
ApiKey: "sk-test",
|
||||
Models: []string{"gpt-4o-transcribe"},
|
||||
DefaultModel: "gpt-4o-transcribe",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
resp, err := ts.Service.Transcribe(userCtx, &v1pb.TranscribeRequest{
|
||||
ProviderId: "openai-main",
|
||||
Config: &v1pb.TranscriptionConfig{
|
||||
Prompt: "names: Alice",
|
||||
},
|
||||
Audio: &v1pb.TranscriptionAudio{
|
||||
Source: &v1pb.TranscriptionAudio_Content{Content: []byte("RIFF")},
|
||||
Filename: "voice.wav",
|
||||
ContentType: "audio/wav",
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "transcribed text", resp.Text)
|
||||
})
|
||||
|
||||
t.Run("rejects unconfigured model", func(t *testing.T) {
|
||||
ts := NewTestService(t)
|
||||
defer ts.Cleanup()
|
||||
|
||||
user, err := ts.CreateRegularUser(ctx, "bob")
|
||||
require.NoError(t, err)
|
||||
userCtx := ts.CreateUserContext(ctx, user.ID)
|
||||
|
||||
_, err = ts.Store.UpsertInstanceSetting(ctx, &storepb.InstanceSetting{
|
||||
Key: storepb.InstanceSettingKey_AI,
|
||||
Value: &storepb.InstanceSetting_AiSetting{
|
||||
AiSetting: &storepb.InstanceAISetting{
|
||||
Providers: []*storepb.AIProviderConfig{
|
||||
{
|
||||
Id: "openai-main",
|
||||
Title: "OpenAI",
|
||||
Type: storepb.AIProviderType_OPENAI_COMPATIBLE,
|
||||
Endpoint: "https://example.com/v1",
|
||||
ApiKey: "sk-test",
|
||||
Models: []string{"gpt-4o-transcribe"},
|
||||
DefaultModel: "gpt-4o-transcribe",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = ts.Service.Transcribe(userCtx, &v1pb.TranscribeRequest{
|
||||
ProviderId: "openai-main",
|
||||
Config: &v1pb.TranscriptionConfig{
|
||||
Model: "other-model",
|
||||
},
|
||||
Audio: &v1pb.TranscriptionAudio{
|
||||
Source: &v1pb.TranscriptionAudio_Content{Content: []byte("RIFF")},
|
||||
Filename: "voice.wav",
|
||||
ContentType: "audio/wav",
|
||||
},
|
||||
})
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "not configured")
|
||||
})
|
||||
|
||||
t.Run("rejects non-audio content before provider call", func(t *testing.T) {
|
||||
ts := NewTestService(t)
|
||||
defer ts.Cleanup()
|
||||
|
||||
user, err := ts.CreateRegularUser(ctx, "charlie")
|
||||
require.NoError(t, err)
|
||||
userCtx := ts.CreateUserContext(ctx, user.ID)
|
||||
|
||||
_, err = ts.Store.UpsertInstanceSetting(ctx, &storepb.InstanceSetting{
|
||||
Key: storepb.InstanceSettingKey_AI,
|
||||
Value: &storepb.InstanceSetting_AiSetting{
|
||||
AiSetting: &storepb.InstanceAISetting{
|
||||
Providers: []*storepb.AIProviderConfig{
|
||||
{
|
||||
Id: "openai-main",
|
||||
Title: "OpenAI",
|
||||
Type: storepb.AIProviderType_OPENAI_COMPATIBLE,
|
||||
Endpoint: "https://example.com/v1",
|
||||
ApiKey: "sk-test",
|
||||
Models: []string{"gpt-4o-transcribe"},
|
||||
DefaultModel: "gpt-4o-transcribe",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = ts.Service.Transcribe(userCtx, &v1pb.TranscribeRequest{
|
||||
ProviderId: "openai-main",
|
||||
Config: &v1pb.TranscriptionConfig{
|
||||
Model: "gpt-4o-transcribe",
|
||||
},
|
||||
Audio: &v1pb.TranscriptionAudio{
|
||||
Source: &v1pb.TranscriptionAudio_Content{Content: []byte("not audio")},
|
||||
Filename: "notes.txt",
|
||||
ContentType: "text/plain",
|
||||
},
|
||||
})
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "not supported")
|
||||
})
|
||||
}
|
||||
@ -0,0 +1,408 @@
|
||||
import { create } from "@bufbuild/protobuf";
|
||||
import { isEqual } from "lodash-es";
|
||||
import { MoreVerticalIcon, PlusIcon } from "lucide-react";
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
import { toast } from "react-hot-toast";
|
||||
import ConfirmDialog from "@/components/ConfirmDialog";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog";
|
||||
import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdown-menu";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { useInstance } from "@/contexts/InstanceContext";
|
||||
import { handleError } from "@/lib/error";
|
||||
import {
|
||||
InstanceSetting_AIProviderConfig,
|
||||
InstanceSetting_AIProviderConfigSchema,
|
||||
InstanceSetting_AIProviderType,
|
||||
InstanceSetting_AISettingSchema,
|
||||
InstanceSetting_Key,
|
||||
InstanceSettingSchema,
|
||||
} from "@/types/proto/api/v1/instance_service_pb";
|
||||
import { useTranslate } from "@/utils/i18n";
|
||||
import SettingGroup from "./SettingGroup";
|
||||
import SettingSection from "./SettingSection";
|
||||
import SettingTable from "./SettingTable";
|
||||
|
||||
type LocalAIProvider = {
|
||||
id: string;
|
||||
title: string;
|
||||
type: InstanceSetting_AIProviderType;
|
||||
endpoint: string;
|
||||
apiKey: string;
|
||||
apiKeySet: boolean;
|
||||
apiKeyHint: string;
|
||||
models: string[];
|
||||
defaultModel: string;
|
||||
};
|
||||
|
||||
const providerTypeOptions = [
|
||||
InstanceSetting_AIProviderType.OPENAI,
|
||||
InstanceSetting_AIProviderType.OPENAI_COMPATIBLE,
|
||||
InstanceSetting_AIProviderType.ANTHROPIC,
|
||||
InstanceSetting_AIProviderType.GEMINI,
|
||||
];
|
||||
|
||||
const createProviderID = () => {
|
||||
if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
|
||||
return crypto.randomUUID();
|
||||
}
|
||||
return `ai-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
};
|
||||
|
||||
const getProviderTypeLabel = (type: InstanceSetting_AIProviderType) => {
|
||||
return InstanceSetting_AIProviderType[type] ?? "UNKNOWN";
|
||||
};
|
||||
|
||||
const toLocalProvider = (provider: InstanceSetting_AIProviderConfig): LocalAIProvider => ({
|
||||
id: provider.id,
|
||||
title: provider.title,
|
||||
type: provider.type,
|
||||
endpoint: provider.endpoint,
|
||||
apiKey: "",
|
||||
apiKeySet: provider.apiKeySet,
|
||||
apiKeyHint: provider.apiKeyHint,
|
||||
models: [...provider.models],
|
||||
defaultModel: provider.defaultModel,
|
||||
});
|
||||
|
||||
const normalizeModels = (value: string) => {
|
||||
const models = value
|
||||
.split(/\r?\n/)
|
||||
.map((model) => model.trim())
|
||||
.filter(Boolean);
|
||||
return Array.from(new Set(models));
|
||||
};
|
||||
|
||||
const newProvider = (): LocalAIProvider => ({
|
||||
id: createProviderID(),
|
||||
title: "",
|
||||
type: InstanceSetting_AIProviderType.OPENAI,
|
||||
endpoint: "",
|
||||
apiKey: "",
|
||||
apiKeySet: false,
|
||||
apiKeyHint: "",
|
||||
models: [],
|
||||
defaultModel: "",
|
||||
});
|
||||
|
||||
const toProviderConfig = (provider: LocalAIProvider) =>
|
||||
create(InstanceSetting_AIProviderConfigSchema, {
|
||||
id: provider.id,
|
||||
title: provider.title.trim(),
|
||||
type: provider.type,
|
||||
endpoint: provider.endpoint.trim(),
|
||||
apiKey: provider.apiKey,
|
||||
models: provider.models,
|
||||
defaultModel: provider.defaultModel.trim(),
|
||||
});
|
||||
|
||||
const AISection = () => {
|
||||
const t = useTranslate();
|
||||
const { aiSetting: originalSetting, updateSetting, fetchSetting } = useInstance();
|
||||
const [providers, setProviders] = useState<LocalAIProvider[]>(() => originalSetting.providers.map(toLocalProvider));
|
||||
const [editingProvider, setEditingProvider] = useState<LocalAIProvider | undefined>();
|
||||
const [deleteTarget, setDeleteTarget] = useState<LocalAIProvider | undefined>();
|
||||
|
||||
useEffect(() => {
|
||||
setProviders(originalSetting.providers.map(toLocalProvider));
|
||||
}, [originalSetting.providers]);
|
||||
|
||||
const originalProviders = useMemo(() => originalSetting.providers.map(toLocalProvider), [originalSetting.providers]);
|
||||
const hasChanges = !isEqual(providers, originalProviders);
|
||||
|
||||
const handleCreateProvider = () => {
|
||||
setEditingProvider(newProvider());
|
||||
};
|
||||
|
||||
const handleEditProvider = (provider: LocalAIProvider) => {
|
||||
setEditingProvider({ ...provider, apiKey: "" });
|
||||
};
|
||||
|
||||
const handleSaveProvider = (provider: LocalAIProvider) => {
|
||||
const title = provider.title.trim();
|
||||
const endpoint = provider.endpoint.trim();
|
||||
const models = provider.models.map((model) => model.trim()).filter(Boolean);
|
||||
const defaultModel = provider.defaultModel.trim() || models[0] || "";
|
||||
|
||||
if (!title) {
|
||||
toast.error(t("setting.ai.provider-title-required"));
|
||||
return;
|
||||
}
|
||||
if (provider.type === InstanceSetting_AIProviderType.OPENAI_COMPATIBLE && !endpoint) {
|
||||
toast.error(t("setting.ai.endpoint-required"));
|
||||
return;
|
||||
}
|
||||
if (!provider.apiKeySet && !provider.apiKey.trim()) {
|
||||
toast.error(t("setting.ai.api-key-required"));
|
||||
return;
|
||||
}
|
||||
if (models.length === 0) {
|
||||
toast.error(t("setting.ai.models-required"));
|
||||
return;
|
||||
}
|
||||
if (defaultModel && !models.includes(defaultModel)) {
|
||||
toast.error(t("setting.ai.default-model-required"));
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedProvider = {
|
||||
...provider,
|
||||
title,
|
||||
endpoint,
|
||||
models,
|
||||
defaultModel,
|
||||
};
|
||||
setProviders((prev) => {
|
||||
const exists = prev.some((item) => item.id === normalizedProvider.id);
|
||||
if (!exists) {
|
||||
return [...prev, normalizedProvider];
|
||||
}
|
||||
return prev.map((item) => (item.id === normalizedProvider.id ? normalizedProvider : item));
|
||||
});
|
||||
setEditingProvider(undefined);
|
||||
};
|
||||
|
||||
const handleDeleteProvider = () => {
|
||||
if (!deleteTarget) return;
|
||||
setProviders((prev) => prev.filter((provider) => provider.id !== deleteTarget.id));
|
||||
setDeleteTarget(undefined);
|
||||
};
|
||||
|
||||
const handleSaveSetting = async () => {
|
||||
try {
|
||||
await updateSetting(
|
||||
create(InstanceSettingSchema, {
|
||||
name: `instance/settings/${InstanceSetting_Key[InstanceSetting_Key.AI]}`,
|
||||
value: {
|
||||
case: "aiSetting",
|
||||
value: create(InstanceSetting_AISettingSchema, {
|
||||
providers: providers.map(toProviderConfig),
|
||||
}),
|
||||
},
|
||||
}),
|
||||
);
|
||||
await fetchSetting(InstanceSetting_Key.AI);
|
||||
toast.success(t("message.update-succeed"));
|
||||
} catch (error: unknown) {
|
||||
handleError(error, toast.error, {
|
||||
context: "Update AI providers",
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<SettingSection
|
||||
title={t("setting.ai.label")}
|
||||
actions={
|
||||
<Button onClick={handleCreateProvider}>
|
||||
<PlusIcon className="w-4 h-4 mr-2" />
|
||||
{t("setting.ai.add-provider")}
|
||||
</Button>
|
||||
}
|
||||
>
|
||||
<SettingGroup title={t("setting.ai.providers")} description={t("setting.ai.description")}>
|
||||
<SettingTable
|
||||
columns={[
|
||||
{
|
||||
key: "title",
|
||||
header: t("common.name"),
|
||||
render: (_, provider: LocalAIProvider) => (
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<span className="text-foreground">{provider.title}</span>
|
||||
<span className="font-mono text-xs text-muted-foreground">{provider.id}</span>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: "type",
|
||||
header: t("setting.ai.provider-type"),
|
||||
render: (_, provider: LocalAIProvider) => <span>{getProviderTypeLabel(provider.type)}</span>,
|
||||
},
|
||||
{
|
||||
key: "models",
|
||||
header: t("setting.ai.models"),
|
||||
render: (_, provider: LocalAIProvider) => (
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<span className="text-foreground">{provider.defaultModel || provider.models[0] || "-"}</span>
|
||||
<span className="text-xs text-muted-foreground">{t("setting.ai.model-count", { count: provider.models.length })}</span>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: "apiKeySet",
|
||||
header: t("setting.ai.api-key"),
|
||||
render: (_, provider: LocalAIProvider) => (
|
||||
<span className="font-mono text-xs">{provider.apiKeySet ? provider.apiKeyHint || t("setting.ai.configured") : "-"}</span>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: "actions",
|
||||
header: "",
|
||||
className: "text-right",
|
||||
render: (_, provider: LocalAIProvider) => (
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button variant="outline" size="sm">
|
||||
<MoreVerticalIcon className="w-4 h-auto" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end" sideOffset={2}>
|
||||
<DropdownMenuItem onClick={() => handleEditProvider(provider)}>{t("common.edit")}</DropdownMenuItem>
|
||||
<DropdownMenuItem onClick={() => setDeleteTarget(provider)} className="text-destructive focus:text-destructive">
|
||||
{t("common.delete")}
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
),
|
||||
},
|
||||
]}
|
||||
data={providers}
|
||||
emptyMessage={t("setting.ai.no-providers")}
|
||||
getRowKey={(provider) => provider.id}
|
||||
/>
|
||||
</SettingGroup>
|
||||
|
||||
<div className="w-full flex justify-end">
|
||||
<Button disabled={!hasChanges} onClick={handleSaveSetting}>
|
||||
{t("common.save")}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<AIProviderDialog
|
||||
provider={editingProvider}
|
||||
onOpenChange={(open) => !open && setEditingProvider(undefined)}
|
||||
onSave={handleSaveProvider}
|
||||
/>
|
||||
|
||||
<ConfirmDialog
|
||||
open={!!deleteTarget}
|
||||
onOpenChange={(open) => !open && setDeleteTarget(undefined)}
|
||||
title={deleteTarget ? t("setting.ai.delete-provider", { title: deleteTarget.title }) : ""}
|
||||
confirmLabel={t("common.delete")}
|
||||
cancelLabel={t("common.cancel")}
|
||||
onConfirm={handleDeleteProvider}
|
||||
confirmVariant="destructive"
|
||||
/>
|
||||
</SettingSection>
|
||||
);
|
||||
};
|
||||
|
||||
interface AIProviderDialogProps {
|
||||
provider?: LocalAIProvider;
|
||||
onOpenChange: (open: boolean) => void;
|
||||
onSave: (provider: LocalAIProvider) => void;
|
||||
}
|
||||
|
||||
const AIProviderDialog = ({ provider, onOpenChange, onSave }: AIProviderDialogProps) => {
|
||||
const t = useTranslate();
|
||||
const [draft, setDraft] = useState<LocalAIProvider>(() => provider ?? newProvider());
|
||||
const [modelsText, setModelsText] = useState("");
|
||||
|
||||
useEffect(() => {
|
||||
const next = provider ?? newProvider();
|
||||
setDraft(next);
|
||||
setModelsText(next.models.join("\n"));
|
||||
}, [provider]);
|
||||
|
||||
const updateDraft = (partial: Partial<LocalAIProvider>) => {
|
||||
setDraft((prev) => ({ ...prev, ...partial }));
|
||||
};
|
||||
|
||||
const handleSave = () => {
|
||||
onSave({
|
||||
...draft,
|
||||
models: normalizeModels(modelsText),
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={!!provider} onOpenChange={onOpenChange}>
|
||||
<DialogContent size="2xl">
|
||||
<DialogHeader>
|
||||
<DialogTitle>{provider?.apiKeySet ? t("setting.ai.edit-provider") : t("setting.ai.add-provider")}</DialogTitle>
|
||||
<DialogDescription>{t("setting.ai.dialog-description")}</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
|
||||
<div className="flex flex-col gap-1.5">
|
||||
<Label>{t("setting.ai.provider-title")}</Label>
|
||||
<Input value={draft.title} onChange={(e) => updateDraft({ title: e.target.value })} placeholder="OpenAI" />
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1.5">
|
||||
<Label>{t("setting.ai.provider-type")}</Label>
|
||||
<Select
|
||||
value={String(draft.type)}
|
||||
onValueChange={(value) => updateDraft({ type: Number(value) as InstanceSetting_AIProviderType })}
|
||||
>
|
||||
<SelectTrigger className="w-full">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{providerTypeOptions.map((type) => (
|
||||
<SelectItem key={type} value={String(type)}>
|
||||
{getProviderTypeLabel(type)}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1.5 sm:col-span-2">
|
||||
<Label>{t("setting.ai.endpoint")}</Label>
|
||||
<Input
|
||||
value={draft.endpoint}
|
||||
onChange={(e) => updateDraft({ endpoint: e.target.value })}
|
||||
placeholder={draft.type === InstanceSetting_AIProviderType.OPENAI ? "https://api.openai.com/v1" : "https://example.com/v1"}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1.5 sm:col-span-2">
|
||||
<Label>{t("setting.ai.api-key")}</Label>
|
||||
<Input
|
||||
type="password"
|
||||
value={draft.apiKey}
|
||||
onChange={(e) => updateDraft({ apiKey: e.target.value })}
|
||||
placeholder={draft.apiKeySet ? t("setting.ai.keep-api-key") : ""}
|
||||
/>
|
||||
{draft.apiKeySet && (
|
||||
<p className="text-xs text-muted-foreground">{t("setting.ai.current-key", { key: draft.apiKeyHint || "-" })}</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1.5 sm:col-span-2">
|
||||
<Label>{t("setting.ai.models")}</Label>
|
||||
<Textarea
|
||||
className="font-mono text-sm min-h-28"
|
||||
value={modelsText}
|
||||
onChange={(e) => setModelsText(e.target.value)}
|
||||
placeholder={"gpt-4o-transcribe\ngpt-4o-mini-transcribe"}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">{t("setting.ai.models-hint")}</p>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1.5 sm:col-span-2">
|
||||
<Label>{t("setting.ai.default-model")}</Label>
|
||||
<Input
|
||||
value={draft.defaultModel}
|
||||
onChange={(e) => updateDraft({ defaultModel: e.target.value })}
|
||||
placeholder={normalizeModels(modelsText)[0] ?? ""}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DialogFooter>
|
||||
<Button variant="ghost" onClick={() => onOpenChange(false)}>
|
||||
{t("common.cancel")}
|
||||
</Button>
|
||||
<Button onClick={handleSave}>{t("common.save")}</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default AISection;
|
||||
@ -0,0 +1,166 @@
|
||||
// @generated by protoc-gen-es v2.11.0 with parameter "target=ts"
|
||||
// @generated from file api/v1/ai_service.proto (package memos.api.v1, syntax proto3)
|
||||
/* eslint-disable */
|
||||
|
||||
import type { GenFile, GenMessage, GenService } from "@bufbuild/protobuf/codegenv2";
|
||||
import { fileDesc, messageDesc, serviceDesc } from "@bufbuild/protobuf/codegenv2";
|
||||
import { file_google_api_annotations } from "../../google/api/annotations_pb";
|
||||
import { file_google_api_client } from "../../google/api/client_pb";
|
||||
import { file_google_api_field_behavior } from "../../google/api/field_behavior_pb";
|
||||
import type { Message } from "@bufbuild/protobuf";
|
||||
|
||||
/**
|
||||
* Describes the file api/v1/ai_service.proto.
|
||||
*/
|
||||
export const file_api_v1_ai_service: GenFile = /*@__PURE__*/
|
||||
fileDesc("ChdhcGkvdjEvYWlfc2VydmljZS5wcm90bxIMbWVtb3MuYXBpLnYxIpsBChFUcmFuc2NyaWJlUmVxdWVzdBIYCgtwcm92aWRlcl9pZBgBIAEoCUID4EECEjYKBmNvbmZpZxgCIAEoCzIhLm1lbW9zLmFwaS52MS5UcmFuc2NyaXB0aW9uQ29uZmlnQgPgQQISNAoFYXVkaW8YAyABKAsyIC5tZW1vcy5hcGkudjEuVHJhbnNjcmlwdGlvbkF1ZGlvQgPgQQIiVQoTVHJhbnNjcmlwdGlvbkNvbmZpZxISCgVtb2RlbBgBIAEoCUID4EEBEhMKBnByb21wdBgCIAEoCUID4EEBEhUKCGxhbmd1YWdlGAMgASgJQgPgQQEidwoSVHJhbnNjcmlwdGlvbkF1ZGlvEhYKB2NvbnRlbnQYASABKAxCA+BBBEgAEg0KA3VyaRgCIAEoCUgAEhUKCGZpbGVuYW1lGAMgASgJQgPgQQESGQoMY29udGVudF90eXBlGAQgASgJQgPgQQFCCAoGc291cmNlIiIKElRyYW5zY3JpYmVSZXNwb25zZRIMCgR0ZXh0GAEgASgJMpoBCglBSVNlcnZpY2USjAEKClRyYW5zY3JpYmUSHy5tZW1vcy5hcGkudjEuVHJhbnNjcmliZVJlcXVlc3QaIC5tZW1vcy5hcGkudjEuVHJhbnNjcmliZVJlc3BvbnNlIjvaQRhwcm92aWRlcl9pZCxjb25maWcsYXVkaW+C0+STAho6ASoiFS9hcGkvdjEvYWk6dHJhbnNjcmliZUKmAQoQY29tLm1lbW9zLmFwaS52MUIOQWlTZXJ2aWNlUHJvdG9QAVowZ2l0aHViLmNvbS91c2VtZW1vcy9tZW1vcy9wcm90by9nZW4vYXBpL3YxO2FwaXYxogIDTUFYqgIMTWVtb3MuQXBpLlYxygIMTWVtb3NcQXBpXFYx4gIYTWVtb3NcQXBpXFYxXEdQQk1ldGFkYXRh6gIOTWVtb3M6OkFwaTo6VjFiBnByb3RvMw", [file_google_api_annotations, file_google_api_client, file_google_api_field_behavior]);
|
||||
|
||||
/**
|
||||
* @generated from message memos.api.v1.TranscribeRequest
|
||||
*/
|
||||
export type TranscribeRequest = Message<"memos.api.v1.TranscribeRequest"> & {
|
||||
/**
|
||||
* Required. The instance AI provider ID to use.
|
||||
*
|
||||
* @generated from field: string provider_id = 1;
|
||||
*/
|
||||
providerId: string;
|
||||
|
||||
/**
|
||||
* Required. Transcription options.
|
||||
*
|
||||
* @generated from field: memos.api.v1.TranscriptionConfig config = 2;
|
||||
*/
|
||||
config?: TranscriptionConfig;
|
||||
|
||||
/**
|
||||
* Required. Audio input.
|
||||
*
|
||||
* @generated from field: memos.api.v1.TranscriptionAudio audio = 3;
|
||||
*/
|
||||
audio?: TranscriptionAudio;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the message memos.api.v1.TranscribeRequest.
|
||||
* Use `create(TranscribeRequestSchema)` to create a new message.
|
||||
*/
|
||||
export const TranscribeRequestSchema: GenMessage<TranscribeRequest> = /*@__PURE__*/
|
||||
messageDesc(file_api_v1_ai_service, 0);
|
||||
|
||||
/**
|
||||
* @generated from message memos.api.v1.TranscriptionConfig
|
||||
*/
|
||||
export type TranscriptionConfig = Message<"memos.api.v1.TranscriptionConfig"> & {
|
||||
/**
|
||||
* Optional. The model to use. If empty, the provider's default model is used.
|
||||
*
|
||||
* @generated from field: string model = 1;
|
||||
*/
|
||||
model: string;
|
||||
|
||||
/**
|
||||
* Optional. A prompt to improve transcription quality.
|
||||
*
|
||||
* @generated from field: string prompt = 2;
|
||||
*/
|
||||
prompt: string;
|
||||
|
||||
/**
|
||||
* Optional. The language of the input audio.
|
||||
*
|
||||
* @generated from field: string language = 3;
|
||||
*/
|
||||
language: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the message memos.api.v1.TranscriptionConfig.
|
||||
* Use `create(TranscriptionConfigSchema)` to create a new message.
|
||||
*/
|
||||
export const TranscriptionConfigSchema: GenMessage<TranscriptionConfig> = /*@__PURE__*/
|
||||
messageDesc(file_api_v1_ai_service, 1);
|
||||
|
||||
/**
|
||||
* @generated from message memos.api.v1.TranscriptionAudio
|
||||
*/
|
||||
export type TranscriptionAudio = Message<"memos.api.v1.TranscriptionAudio"> & {
|
||||
/**
|
||||
* @generated from oneof memos.api.v1.TranscriptionAudio.source
|
||||
*/
|
||||
source: {
|
||||
/**
|
||||
* Inline audio bytes.
|
||||
*
|
||||
* @generated from field: bytes content = 1;
|
||||
*/
|
||||
value: Uint8Array;
|
||||
case: "content";
|
||||
} | {
|
||||
/**
|
||||
* URI for audio content. Reserved for future use.
|
||||
*
|
||||
* @generated from field: string uri = 2;
|
||||
*/
|
||||
value: string;
|
||||
case: "uri";
|
||||
} | { case: undefined; value?: undefined };
|
||||
|
||||
/**
|
||||
* Optional. The uploaded filename.
|
||||
*
|
||||
* @generated from field: string filename = 3;
|
||||
*/
|
||||
filename: string;
|
||||
|
||||
/**
|
||||
* Optional. The MIME type of the input audio.
|
||||
*
|
||||
* @generated from field: string content_type = 4;
|
||||
*/
|
||||
contentType: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the message memos.api.v1.TranscriptionAudio.
|
||||
* Use `create(TranscriptionAudioSchema)` to create a new message.
|
||||
*/
|
||||
export const TranscriptionAudioSchema: GenMessage<TranscriptionAudio> = /*@__PURE__*/
|
||||
messageDesc(file_api_v1_ai_service, 2);
|
||||
|
||||
/**
|
||||
* @generated from message memos.api.v1.TranscribeResponse
|
||||
*/
|
||||
export type TranscribeResponse = Message<"memos.api.v1.TranscribeResponse"> & {
|
||||
/**
|
||||
* The transcribed text.
|
||||
*
|
||||
* @generated from field: string text = 1;
|
||||
*/
|
||||
text: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the message memos.api.v1.TranscribeResponse.
|
||||
* Use `create(TranscribeResponseSchema)` to create a new message.
|
||||
*/
|
||||
export const TranscribeResponseSchema: GenMessage<TranscribeResponse> = /*@__PURE__*/
|
||||
messageDesc(file_api_v1_ai_service, 3);
|
||||
|
||||
/**
|
||||
* @generated from service memos.api.v1.AIService
|
||||
*/
|
||||
export const AIService: GenService<{
|
||||
/**
|
||||
* Transcribe transcribes an audio file using an instance AI provider.
|
||||
*
|
||||
* @generated from rpc memos.api.v1.AIService.Transcribe
|
||||
*/
|
||||
transcribe: {
|
||||
methodKind: "unary";
|
||||
input: typeof TranscribeRequestSchema;
|
||||
output: typeof TranscribeResponseSchema;
|
||||
},
|
||||
}> = /*@__PURE__*/
|
||||
serviceDesc(file_api_v1_ai_service, 0);
|
||||
|
||||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue