From 24f66c8e8116e97ef8f631190ca6a599073122a4 Mon Sep 17 00:00:00 2001 From: sion123 <450702724@qq.com> Date: Tue, 27 Jan 2026 01:39:08 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=8A=9F=E8=83=BD=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/app/web-gold/src/api/voice.js | 6 +- .../app/web-gold/src/composables/useTTS.js | 31 +- .../app/web-gold/src/config/voiceConfig.js | 61 ++++ frontend/app/web-gold/src/views/dh/Video.vue | 57 ++-- .../app/web-gold/src/views/dh/VoiceCopy.vue | 12 +- .../views/kling/hooks/useVoiceGeneration.ts | 2 + .../changes/refactor-voice-provider/design.md | 133 ++++++++ .../refactor-voice-provider/proposal.md | 35 +++ .../specs/voice-clone/spec.md | 132 ++++++++ .../changes/refactor-voice-provider/tasks.md | 53 ++++ sql/mysql/SQL建表语句.sql | 296 ++++++++++++++++++ .../tik/voice/client/CosyVoiceProvider.java | 160 ++++++++++ .../tik/voice/client/VoiceCloneProvider.java | 55 ++++ .../client/VoiceCloneProviderFactory.java | 104 ++++++ .../voice/client/dto/VoiceCloneRequest.java | 51 +++ .../voice/client/dto/VoiceCloneResult.java | 24 ++ .../tik/voice/client/dto/VoiceTtsRequest.java | 77 +++++ .../tik/voice/client/dto/VoiceTtsResult.java | 39 +++ .../voice/config/CosyVoiceProviderConfig.java | 64 ++++ .../voice/config/VoiceProviderProperties.java | 78 +++++ .../service/TikUserVoiceServiceImpl.java | 217 ++++++++----- .../voice/vo/AppTikUserVoiceCreateReqVO.java | 4 +- .../tik/voice/vo/AppTikVoicePreviewReqVO.java | 6 +- .../tik/voice/vo/AppTikVoiceTtsReqVO.java | 6 +- 24 files changed, 1570 insertions(+), 133 deletions(-) create mode 100644 frontend/app/web-gold/src/config/voiceConfig.js create mode 100644 openspec/changes/refactor-voice-provider/design.md create mode 100644 openspec/changes/refactor-voice-provider/proposal.md create mode 100644 openspec/changes/refactor-voice-provider/specs/voice-clone/spec.md create mode 100644 openspec/changes/refactor-voice-provider/tasks.md create mode 100644 sql/mysql/SQL建表语句.sql create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneResult.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsRequest.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsResult.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java diff --git a/frontend/app/web-gold/src/api/voice.js b/frontend/app/web-gold/src/api/voice.js index 8c8908cbaf..3f0c2ea26e 100644 --- a/frontend/app/web-gold/src/api/voice.js +++ b/frontend/app/web-gold/src/api/voice.js @@ -22,6 +22,7 @@ export const VoiceService = { * @param {string} data.language - 语言(可选) * @param {string} data.gender - 音色类型(可选) * @param {string} data.note - 备注(可选) + * @param {string} data.providerType - 供应商类型(可选):cosyvoice-阿里云,siliconflow-硅基流动 * @returns {Promise} */ create(data) { @@ -88,8 +89,9 @@ export const VoiceService = { }, /** - * 文本转语音(CosyVoice) + * 文本转语音 * @param {Object} data + * @param {string} data.providerType - 供应商类型(可选):cosyvoice-阿里云,siliconflow-硅基流动 * @returns {Promise} */ synthesize(data) { @@ -99,6 +101,7 @@ export const VoiceService = { /** * 我的音色试听 * @param {Object} data + * @param {string} data.providerType - 供应商类型(可选):cosyvoice-阿里云,siliconflow-硅基流动 * @returns {Promise} */ preview(data) { @@ -107,4 +110,3 @@ export const VoiceService = { } export default VoiceService - diff --git a/frontend/app/web-gold/src/composables/useTTS.js b/frontend/app/web-gold/src/composables/useTTS.js index f55818e59b..48ef4f9d2f 100644 --- a/frontend/app/web-gold/src/composables/useTTS.js +++ b/frontend/app/web-gold/src/composables/useTTS.js @@ -1,21 +1,18 @@ /** * TTS (Text-to-Speech) 公共Hook - * 支持多个供应商:Qwen, Azure, AWS等 + * 支持多个供应商:CosyVoice, SiliconFlow, Azure, AWS等 */ import { ref, computed } from 'vue' import { message } from 'ant-design-vue' import { VoiceService } from '@/api/voice' +import { normalizeProviderType, VOICE_PROVIDER_TYPES } from '@/config/voiceConfig' -// 供应商配置 -const TTS_PROVIDERS = { - QWEN: 'qwen', - AZURE: 'azure', - AWS: 'aws' -} +// 兼容旧代码的导出 +const TTS_PROVIDERS = VOICE_PROVIDER_TYPES -// 默认配置 +// 供应商默认配置(使用标准化后的键名) const DEFAULT_CONFIG = { - qwen: { + cosyvoice: { apiEndpoint: '/api/tik/voice/tts', audioFormat: 'mp3', supportedFormats: ['mp3', 'wav'] @@ -32,16 +29,9 @@ const DEFAULT_CONFIG = { } } -/** - * TTS Hook主函数 - * @param {Object} options 配置选项 - * @param {string} options.provider 供应商名称,默认'qwen' - * @param {Object} options.customConfig 自定义配置 - * @returns {Object} TTS相关的方法和状态 - */ export function useTTS(options = {}) { const { - provider = TTS_PROVIDERS.QWEN, + provider = VOICE_PROVIDER_TYPES.COSYVOICE, customConfig = {} } = options @@ -59,7 +49,8 @@ export function useTTS(options = {}) { // 获取当前供应商配置 const getProviderConfig = () => { - const config = DEFAULT_CONFIG[provider] || DEFAULT_CONFIG[TTS_PROVIDERS.QWEN] + const normalizedProvider = normalizeProviderType(provider) + const config = DEFAULT_CONFIG[normalizedProvider] || DEFAULT_CONFIG.cosyvoice return { ...config, ...customConfig } } @@ -202,7 +193,7 @@ export function useTTS(options = {}) { speechRate: speechRate.value || 1.0, audioFormat: providerConfig.audioFormat, timestamp: Date.now(), - provider: provider + providerType: normalizeProviderType(provider) } } @@ -285,7 +276,7 @@ export function useTTS(options = {}) { voiceConfigId: params.voiceConfigId, speechRate: params.speechRate || speechRate.value, audioFormat: params.audioFormat || providerConfig.audioFormat, - provider: provider + providerType: normalizeProviderType(provider) } return await VoiceService.synthesize(ttsParams) diff --git a/frontend/app/web-gold/src/config/voiceConfig.js b/frontend/app/web-gold/src/config/voiceConfig.js new file mode 100644 index 0000000000..386b9c4c0e --- /dev/null +++ b/frontend/app/web-gold/src/config/voiceConfig.js @@ -0,0 +1,61 @@ +/** + * 语音供应商统一配置 + */ + +// 供应商类型枚举 +export const VOICE_PROVIDER_TYPES = { + COSYVOICE: 'cosyvoice', + SILICONFLOW: 'siliconflow', + QWEN: 'qwen', + AZURE: 'azure', + AWS: 'aws' +} + +// 默认供应商 +export const DEFAULT_VOICE_PROVIDER = VOICE_PROVIDER_TYPES.COSYVOICE + +// 供应商选项(用于下拉选择) +export const VOICE_PROVIDER_OPTIONS = [ + { label: '阿里云 CosyVoice', value: VOICE_PROVIDER_TYPES.COSYVOICE }, + { label: '硅基流动 SiliconFlow', value: VOICE_PROVIDER_TYPES.SILICONFLOW } +] + +// 供应商别名映射(兼容旧名称) +export const PROVIDER_ALIAS_MAP = { + [VOICE_PROVIDER_TYPES.QWEN]: VOICE_PROVIDER_TYPES.COSYVOICE +} + +/** + * 标准化供应商类型(处理别名映射) + */ +export function normalizeProviderType(providerType) { + if (!providerType) return DEFAULT_VOICE_PROVIDER + return PROVIDER_ALIAS_MAP[providerType] || providerType +} + +/** + * 获取供应商显示名称 + */ +export function getProviderLabel(providerType) { + const option = VOICE_PROVIDER_OPTIONS.find(opt => opt.value === providerType) + return option?.label || providerType +} + +/** + * 检查供应商是否支持 + */ +export function isProviderSupported(providerType) { + const normalized = normalizeProviderType(providerType) + return Object.values(VOICE_PROVIDER_TYPES).includes(normalized) +} + +// 默认导出配置对象 +export default { + VOICE_PROVIDER_TYPES, + DEFAULT_VOICE_PROVIDER, + VOICE_PROVIDER_OPTIONS, + PROVIDER_ALIAS_MAP, + normalizeProviderType, + getProviderLabel, + isProviderSupported +} diff --git a/frontend/app/web-gold/src/views/dh/Video.vue b/frontend/app/web-gold/src/views/dh/Video.vue index a893698598..a28335f887 100644 --- a/frontend/app/web-gold/src/views/dh/Video.vue +++ b/frontend/app/web-gold/src/views/dh/Video.vue @@ -14,6 +14,7 @@ import { MaterialService } from '@/api/material' import { createDigitalHumanTask, getDigitalHumanTask, cancelTask, retryTask } from '@/api/digitalHuman' import { extractVideoCover } from '@/utils/video-cover' import { useUpload } from '@/composables/useUpload' +import { DEFAULT_VOICE_PROVIDER } from '@/config/voiceConfig' // 导入 voiceStore 用于获取用户音色 import { useVoiceCopyStore } from '@/stores/voiceCopy' @@ -249,41 +250,38 @@ const resetPreviewState = () => { } const buildPreviewParams = (voice) => { + // 公共参数 + const baseParams = { + inputText: ttsText.value, + speechRate: speechRate.value || 1.0, + audioFormat: 'mp3', + timestamp: Date.now(), + providerType: DEFAULT_VOICE_PROVIDER + } + if (voice.source === 'user') { - // 用户音色:使用voiceConfigId,不传instruction + // 用户音色:使用voiceConfigId const configId = voice.rawId || extractIdFromString(voice.id) if (!configId) { message.error('配音配置无效') return null } - return { - voiceConfigId: configId, - inputText: ttsText.value, - speechRate: speechRate.value || 1.0, - audioFormat: 'mp3', - timestamp: Date.now() // 添加时间戳确保每次请求不同 - } - } else { - // 系统音色:根据是否选择instruction或emotion来决定传递哪个参数 - const params = { - voiceId: voice.voiceId, - inputText: ttsText.value, - speechRate: speechRate.value || 1.0, - audioFormat: 'mp3', - timestamp: Date.now() // 添加时间戳确保每次请求不同 - } - - // instruction和emotion只能选一个传递 - if (instruction.value && instruction.value !== 'neutral') { - params.instruction = instruction.value - } else if (emotion.value && emotion.value !== 'neutral') { - params.emotion = emotion.value - } else if (voice.defaultInstruction) { - params.instruction = voice.defaultInstruction - } - - return params + return { ...baseParams, voiceConfigId: configId } } + + // 系统音色:使用voiceId,可能包含instruction/emotion + const params = { ...baseParams, voiceId: voice.voiceId } + + // instruction和emotion只能选一个传递 + if (instruction.value && instruction.value !== 'neutral') { + params.instruction = instruction.value + } else if (emotion.value && emotion.value !== 'neutral') { + params.emotion = emotion.value + } else if (voice.defaultInstruction) { + params.instruction = voice.defaultInstruction + } + + return params } const extractIdFromString = (idStr) => { @@ -303,7 +301,8 @@ const handleSynthesizeVoice = async () => { const params = { inputText: ttsText.value, speechRate: speechRate.value, - audioFormat: 'mp3' + audioFormat: 'mp3', + providerType: DEFAULT_VOICE_PROVIDER } if (voice.source === 'user') { diff --git a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue index d6c0c150f4..06e45718ab 100644 --- a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue +++ b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue @@ -113,8 +113,11 @@ import { MaterialService } from '@/api/material' import { useUpload } from '@/composables/useUpload' import dayjs from 'dayjs' import BasicLayout from '@/layouts/components/BasicLayout.vue' +import { VOICE_PROVIDER_OPTIONS, DEFAULT_VOICE_PROVIDER } from '@/config/voiceConfig' // ========== 常量 ========== +const PROVIDER_OPTIONS = VOICE_PROVIDER_OPTIONS + const DEFAULT_FORM_DATA = { id: null, name: '', @@ -122,7 +125,8 @@ const DEFAULT_FORM_DATA = { autoTranscribe: true, language: 'zh-CN', gender: 'female', - note: '' + note: '', + providerType: DEFAULT_VOICE_PROVIDER } // ========== 响应式数据 ========== @@ -183,7 +187,8 @@ const fillFormData = (data) => { fileId: data.fileId || null, language: data.language || 'zh-CN', gender: data.gender || 'female', - note: data.note || '' + note: data.note || '', + providerType: data.providerType || DEFAULT_VOICE_PROVIDER }) } @@ -363,7 +368,8 @@ const handleSubmit = async () => { autoTranscribe: formData.autoTranscribe, language: formData.language, gender: formData.gender, - note: formData.note + note: formData.note, + providerType: formData.providerType } : { id: formData.id, diff --git a/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts b/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts index 25a28190d2..7706e89a20 100644 --- a/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts +++ b/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts @@ -13,6 +13,7 @@ import type { } from '../types/identify-face' // @ts-ignore import { VoiceService } from '@/api/voice' +import { DEFAULT_VOICE_PROVIDER } from '@/config/voiceConfig' /** * 语音生成 Hook @@ -76,6 +77,7 @@ export function useVoiceGeneration(): UseVoiceGeneration { voiceConfigId: voice.rawId || extractIdFromString(voice.id), speechRate: speechRate.value || 1.0, audioFormat: 'mp3' as const, + providerType: DEFAULT_VOICE_PROVIDER, } const res = await VoiceService.synthesize(params) diff --git a/openspec/changes/refactor-voice-provider/design.md b/openspec/changes/refactor-voice-provider/design.md new file mode 100644 index 0000000000..b0f7a487a2 --- /dev/null +++ b/openspec/changes/refactor-voice-provider/design.md @@ -0,0 +1,133 @@ +# Technical Design: Voice Clone Provider Refactoring + +## Context + +当前语音克隆功能直接依赖阿里云 CosyVoice 的 SDK 和 API。Service 层直接调用 `CosyVoiceClient`,导致: + +1. **强耦合**:无法轻松切换或添加其他供应商 +2. **测试困难**:难以 mock 外部依赖 +3. **扩展性差**:添加新供应商需要修改 Service 层 + +## Goals / Non-Goals + +### Goals +- 解耦 Service 层与具体供应商实现 +- 支持多供应商并存和动态切换 +- 保持现有功能完全兼容 +- 为添加硅基流动 IndexTTS-2 打下基础 + +### Non-Goals +- 不改变现有 API 行为 +- 不修改数据库结构 +- 不改变前端交互 + +## Decisions + +### 1. 采用策略模式 + 工厂模式 + +**Why**: +- 策略模式:定义统一接口,各供应商独立实现 +- 工厂模式:根据配置动态获取 Provider 实例 +- 符合开闭原则,扩展时无需修改现有代码 + +**架构**: +``` +VoiceCloneProvider (interface) +├── CosyVoiceProvider (impl) - 阿里云 CosyVoice (DashScope) +├── SiliconFlowProvider (impl) - 阶段二:硅基流动 IndexTTS-2 +└── VoiceCloneProviderFactory +``` + +**说明**: +- `CosyVoiceProvider` 对应阿里云 DashScope 的语音服务 +- 默认模型:`cosyvoice-v3-flash` +- 扩展时添加新的 Provider 实现 + +### 2. 统一 DTO 设计 + +**Why**: 屏蔽不同供应商的 API 差异 + +```java +// 统一请求 +VoiceCloneRequest { + String audioUrl; // 音频 URL + String prefix; // 音色前缀 + String targetModel; // 目标模型 +} + +// 统一响应 +VoiceCloneResult { + String voiceId; // 生成的音色 ID + String requestId; // 请求 ID +} +``` + +### 3. 配置结构设计 + +**新配置结构**: +```yaml +yudao: + voice: + # 默认供应商 + default-provider: cosyvoice + + # 供应商配置 + providers: + cosyvoice: # 阿里云 CosyVoice + enabled: true + api-key: ${DASHSCOPE_API_KEY} + default-model: cosyvoice-v3-flash + # ... 其他配置 + + siliconflow: # 阶段二添加 + enabled: false + api-key: ${SILICONFLOW_API_KEY} + base-url: https://api.siliconflow.cn + default-model: indextts-2 +``` + +**向后兼容**: +- 读取旧配置 `yudao.cosyvoice.*` 并合并到新结构 +- 优先使用新配置,旧配置作为 fallback + +### 4. 错误处理策略 + +- Provider 调用失败时,记录详细日志 +- 返回统一的业务异常 `VOICE_TTS_FAILED` +- 不暴露底层供应商的技术细节 + +## Risks / Trade-offs + +| Risk | Mitigation | +|------|------------| +| 破坏现有功能 | 充分测试,保持 DTO 兼容 | +| 配置迁移复杂 | 支持旧配置自动映射 | +| 性能开销 | 工厂缓存 Provider 实例 | + +## Migration Plan + +### 阶段一:CosyVoice 重构 +1. 创建接口和工厂 +2. 重构 CosyVoice 为 Provider 实现 +3. 更新 Service 层使用接口 +4. 测试验证 + +### 阶段二:添加 SiliconFlow +1. 实现 SiliconFlowProvider +2. 添加配置支持 +3. 集成测试 + +### 回滚方案 +- 保留原有配置支持 +- Feature Flag 控制新逻辑 + +## Open Questions + +1. **Q**: 是否需要支持运行时动态切换供应商? + **A**: 初期不支持,通过配置切换即可 + +2. **Q**: 是否需要 Provider 健康检查? + **A**: 阶段二考虑添加 + +3. **Q**: DTO 字段差异如何处理? + **A**: 使用公共字段,扩展字段放 `Map extensions` diff --git a/openspec/changes/refactor-voice-provider/proposal.md b/openspec/changes/refactor-voice-provider/proposal.md new file mode 100644 index 0000000000..80d671ecd8 --- /dev/null +++ b/openspec/changes/refactor-voice-provider/proposal.md @@ -0,0 +1,35 @@ +# Change: Refactor Voice Clone Provider + +## Why + +当前语音克隆功能直接依赖阿里云 CosyVoice 实现,代码强耦合,扩展性差。添加新供应商(如硅基流动 IndexTTS-2)需要修改 Service 层代码,违反开闭原则。 + +**说明**: CosyVoice 是阿里云的语音合成服务(DashScope 平台),支持语音克隆和 TTS。当前代码使用 `cosyvoice-v3-flash` 模型。 + +## What Changes + +- **ADDED** 引入策略模式,定义 `VoiceCloneProvider` 统一接口 +- **ADDED** 创建工厂类 `VoiceCloneProviderFactory` 管理多供应商 +- **MODIFIED** 将现有 `CosyVoiceClient` 改造为 `CosyVoiceProvider` +- **MODIFIED** 更新 `TikUserVoiceServiceImpl` 使用 Provider 接口 +- **ADDED** 新增配置类支持多供应商配置和切换 +- **BREAKING** 配置项从 `yudao.cosyvoice` 迁移到 `yudao.voice.providers` + +## Impact + +- **Affected specs**: + - `voice-clone` (新增能力规范) +- **Affected code**: + - `TikUserVoiceServiceImpl.java` - Service 层改为依赖注入 Provider + - `CosyVoiceClient.java` → `CosyVoiceProvider.java` - 重命名并实现接口 + - `CosyVoiceProperties.java` → `VoiceProviderProperties.java` - 配置结构重组 + - 新增 `VoiceCloneProvider.java` - 统一接口定义 + - 新增 `VoiceCloneProviderFactory.java` - 工厂类 + - 新增 `SiliconFlowProvider.java` - 硅基流动实现(阶段二) + +## Migration + +- 现有配置自动迁移:`yudao.cosyvoice.*` → `yudao.voice.providers.cosyvoice.*` +- 默认供应商保持为 `cosyvoice` +- 默认行为保持不变,向后兼容 +- 支持通过配置切换供应商:`yudao.voice.default-provider` diff --git a/openspec/changes/refactor-voice-provider/specs/voice-clone/spec.md b/openspec/changes/refactor-voice-provider/specs/voice-clone/spec.md new file mode 100644 index 0000000000..2d9245e7b7 --- /dev/null +++ b/openspec/changes/refactor-voice-provider/specs/voice-clone/spec.md @@ -0,0 +1,132 @@ +# Voice Clone Capability Specification + +## ADDED Requirements + +### Requirement: Provider Abstraction Layer +The system SHALL provide a unified provider abstraction layer for voice cloning services, supporting multiple vendors through a common interface. + +#### Scenario: Get provider by type +- **GIVEN** the system is configured with multiple voice clone providers +- **WHEN** requesting a provider by type +- **THEN** the system SHALL return the corresponding provider instance +- **AND** the provider SHALL implement the `VoiceCloneProvider` interface + +#### Scenario: Provider not found +- **GIVEN** the system is configured with a default provider +- **WHEN** requesting a non-existent provider type +- **THEN** the system SHALL fallback to the default provider +- **AND** log a warning message + +### Requirement: Voice Cloning +The system SHALL support voice cloning through the provider interface, accepting an audio file URL and returning a unique voice ID. + +#### Scenario: Successful voice cloning with CosyVoice +- **GIVEN** a valid CosyVoice provider is configured +- **WHEN** submitting a voice clone request with audio URL +- **THEN** the system SHALL return a voice ID +- **AND** the voice ID SHALL be usable for subsequent TTS synthesis + +#### Scenario: Voice cloning failure +- **GIVEN** the provider API is unavailable or returns an error +- **WHEN** submitting a voice clone request +- **THEN** the system SHALL throw a `VOICE_TTS_FAILED` exception +- **AND** log the error details for debugging + +### Requirement: Text-to-Speech Synthesis +The system SHALL support TTS synthesis through cloned voices or system voices, accepting text input and returning audio data. + +#### Scenario: TTS with cloned voice +- **GIVEN** a valid voice ID from a previous clone operation +- **WHEN** submitting a TTS request with text and voice ID +- **THEN** the system SHALL return audio data in the specified format +- **AND** the audio SHALL match the cloned voice characteristics + +#### Scenario: TTS with system voice +- **GIVEN** a system voice ID is configured +- **WHEN** submitting a TTS request with text and system voice ID +- **THEN** the system SHALL return audio data using the system voice +- **AND** the audio SHALL match the system voice characteristics + +#### Scenario: TTS with reference audio (file URL) +- **GIVEN** a reference audio URL and transcription text +- **WHEN** submitting a TTS request with file URL +- **THEN** the system SHALL perform on-the-fly voice cloning +- **AND** return audio data matching the reference voice + +### Requirement: Configuration Management +The system SHALL support multi-provider configuration through a unified configuration structure. + +#### Scenario: Configure multiple providers +- **GIVEN** the application configuration file +- **WHEN** configuring multiple voice providers +- **THEN** each provider SHALL have independent `enabled` flag +- **AND** the system SHALL only use enabled providers + +#### Scenario: Default provider selection +- **GIVEN** the configuration specifies a `default-provider` +- **WHEN** no provider is explicitly specified +- **THEN** the system SHALL use the default provider +- **AND** fallback to `cosyvoice` if default is not configured + +#### Scenario: Backward compatibility +- **GIVEN** existing configuration using `yudao.cosyvoice.*` +- **WHEN** the system starts +- **THEN** the system SHALL automatically migrate to new config structure +- **AND** existing functionality SHALL remain unchanged + +### Requirement: Provider Factory +The system SHALL provide a factory component for managing provider instances and resolving providers by type. + +#### Scenario: Factory resolves provider +- **GIVEN** the factory is initialized with provider configurations +- **WHEN** calling `factory.getProvider("cosyvoice")` +- **THEN** the factory SHALL return the CosyVoiceProvider instance +- **AND** cache the instance for subsequent requests + +#### Scenario: Factory returns default +- **GIVEN** the factory is configured with default provider +- **WHEN** calling `factory.getProvider(null)` +- **THEN** the factory SHALL return the default provider instance + +## MODIFIED Requirements + +### Requirement: Voice Creation Flow +The voice creation process SHALL use the provider abstraction layer instead of directly calling CosyVoice client. + +#### Scenario: Create voice with CosyVoice +- **GIVEN** a user uploads a voice audio file +- **WHEN** creating a voice configuration through the API +- **THEN** the system SHALL: + 1. Validate the file exists and belongs to voice category + 2. Call `provider.cloneVoice()` with the audio URL + 3. Store the returned `voiceId` in the database + 4. Return success response with voice configuration ID + +#### Scenario: Create voice with transcription +- **GIVEN** a voice configuration is created without transcription +- **WHEN** the user triggers transcription +- **THEN** the system SHALL: + 1. Fetch the audio file URL + 2. Call the transcription service + 3. Store the transcription text + 4. Update the voice configuration + +### Requirement: Voice Preview +The voice preview functionality SHALL work with both cloned voices (voiceId) and reference audio (file URL). + +#### Scenario: Preview cloned voice +- **GIVEN** a voice configuration with a valid `voiceId` +- **WHEN** requesting a preview with custom text +- **THEN** the system SHALL call `provider.synthesize()` with the voiceId +- **AND** return audio data in Base64 format + +#### Scenario: Preview with reference audio +- **GIVEN** a voice configuration without `voiceId` but with audio file +- **WHEN** requesting a preview +- **THEN** the system SHALL call `provider.synthesize()` with the file URL +- **AND** use the stored transcription as reference text +- **AND** return audio data in Base64 format + +## REMOVED Requirements + +None. This change is additive and refactoring only. diff --git a/openspec/changes/refactor-voice-provider/tasks.md b/openspec/changes/refactor-voice-provider/tasks.md new file mode 100644 index 0000000000..5e25ca2f74 --- /dev/null +++ b/openspec/changes/refactor-voice-provider/tasks.md @@ -0,0 +1,53 @@ +# Implementation Tasks + +## 1. 接口与基础结构 +- [ ] 1.1 创建 `VoiceCloneProvider` 接口 + - 定义 `cloneVoice(VoiceCloneRequest)` 方法 + - 定义 `synthesize(VoiceTtsRequest)` 方法 + - 定义 `supports(String providerType)` 方法 +- [ ] 1.2 创建统一 DTO 类 + - `VoiceCloneRequest` - 语音克隆请求 + - `VoiceCloneResult` - 语音克隆响应 + - `VoiceTtsRequest` - 语音合成请求 + - `VoiceTtsResult` - 语音合成响应 +- [ ] 1.3 创建 `VoiceCloneProviderFactory` 工厂类 + - 根据配置获取 Provider 实例 + - 支持动态切换供应商 + +## 2. CosyVoice 重构(保持现有功能) +- [ ] 2.1 重命名 `CosyVoiceClient` → `CosyVoiceProvider` +- [ ] 2.2 `CosyVoiceProvider` 实现 `VoiceCloneProvider` 接口 +- [ ] 2.3 适配现有 DTO 到新的统一 DTO +- [ ] 2.4 保持现有 DashScope SDK 调用逻辑不变 + +## 3. 配置重构 +- [ ] 3.1 创建 `VoiceProviderProperties` 配置类 + - 支持多供应商配置结构 + - 添加 `default-provider` 配置项 +- [ ] 3.2 创建 `CosyVoiceProviderConfig` (嵌套配置) +- [ ] 3.3 保持向后兼容:支持读取旧的 `yudao.cosyvoice.*` 配置 + +## 4. Service 层改造 +- [ ] 4.1 修改 `TikUserVoiceServiceImpl` + - 注入 `VoiceCloneProvider` 而非 `CosyVoiceClient` + - 使用工厂获取 Provider 实例 +- [ ] 4.2 更新方法调用 + - `createVoice()` - 使用 `provider.cloneVoice()` + - `synthesizeVoice()` - 使用 `provider.synthesize()` + - `previewVoice()` - 使用 `provider.synthesize()` + +## 5. 测试与验证 +- [ ] 5.1 单元测试:CosyVoiceProvider +- [ ] 5.2 单元测试:VoiceCloneProviderFactory +- [ ] 5.3 集成测试:TikUserVoiceServiceImpl +- [ ] 5.4 验证现有功能正常运行 + +## 6. 文档与配置迁移 +- [ ] 6.1 更新 `application.yaml` 配置示例 +- [ ] 6.2 添加配置迁移说明文档 + +--- + +**总计**: 20 项任务 + +**预计工作量**: 2-3 天 diff --git a/sql/mysql/SQL建表语句.sql b/sql/mysql/SQL建表语句.sql new file mode 100644 index 0000000000..ffb84696c4 --- /dev/null +++ b/sql/mysql/SQL建表语句.sql @@ -0,0 +1,296 @@ +-- Yudao 风格建表语句 +-- 包含多租户概念,使用 TenantBaseDO + +-- =============================================== +-- 1. 积分管理模块 +-- =============================================== + +-- 积分兑换配置表 +CREATE TABLE `member_point_exchange_config` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `exchange_rate` int NOT NULL DEFAULT 1 COMMENT '兑换比例(1元兑换多少积分)', + `adjust_reason` varchar(200) NOT NULL DEFAULT '' COMMENT '调整原因', + `operator_id` bigint NOT NULL DEFAULT 0 COMMENT '操作人用户编号', + `operator_name` varchar(64) NOT NULL DEFAULT '' COMMENT '操作人账号', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_create_time` (`create_time`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='积分兑换配置表'; + +-- 积分签到配置表 +CREATE TABLE `member_point_signin_config` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `daily_points` int NOT NULL DEFAULT 0 COMMENT '每日签到赠送积分', + `continuous_days` int NOT NULL DEFAULT 0 COMMENT '连续签到天数', + `bonus_points` int NOT NULL DEFAULT 0 COMMENT '连续签到奖励积分', + `reset_days` int NOT NULL DEFAULT 0 COMMENT '重置签到天数(0表示不重置)', + `adjust_reason` varchar(200) NOT NULL DEFAULT '' COMMENT '调整原因', + `operator_id` bigint NOT NULL DEFAULT 0 COMMENT '操作人用户编号', + `operator_name` varchar(64) NOT NULL DEFAULT '' COMMENT '操作人账号', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_continuous_days` (`continuous_days`) USING BTREE, + KEY `idx_create_time` (`create_time`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='积分签到配置表'; + +-- 积分充值配置表 +CREATE TABLE `member_point_recharge_config` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `recharge_amount` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '充值金额', + `bonus_points` int NOT NULL DEFAULT 0 COMMENT '赠送积分数', + `adjust_reason` varchar(200) NOT NULL DEFAULT '' COMMENT '调整原因', + `operator_id` bigint NOT NULL DEFAULT 0 COMMENT '操作人用户编号', + `operator_name` varchar(64) NOT NULL DEFAULT '' COMMENT '操作人账号', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_recharge_amount` (`recharge_amount`) USING BTREE, + KEY `idx_create_time` (`create_time`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='积分充值配置表'; + +-- 积分记录表 +CREATE TABLE `member_point_record` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `user_id` bigint NOT NULL DEFAULT 0 COMMENT '用户编号', + `mobile` varchar(20) NOT NULL DEFAULT '' COMMENT '手机号', + `type` varchar(20) NOT NULL DEFAULT '' COMMENT '变动类型(increase-增加 decrease-减少)', + `point_amount` int NOT NULL DEFAULT 0 COMMENT '变动积分数量(正数为增加,负数为减少)', + `balance` int NOT NULL DEFAULT 0 COMMENT '变动后余额', + `reason` varchar(100) NOT NULL DEFAULT '' COMMENT '变动原因', + `biz_type` varchar(50) NOT NULL DEFAULT '' COMMENT '业务类型(signin-签到 recharge-充值 exchange-兑换 admin-后台调整 gift-礼包赠送)', + `biz_id` varchar(64) NOT NULL DEFAULT '' COMMENT '业务关联ID', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id_user_id` (`tenant_id`, `user_id`) USING BTREE, + KEY `idx_user_id_create_time` (`user_id`, `create_time`) USING BTREE, + KEY `idx_type` (`type`) USING BTREE, + KEY `idx_biz_type` (`biz_type`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='积分记录表'; + +-- =============================================== +-- 2. 客户管理模块 +-- =============================================== + +-- 会员用户表 +CREATE TABLE `member_user` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `user_id` varchar(32) NOT NULL COMMENT '用户ID', + `mobile` varchar(20) NOT NULL COMMENT '手机号', + `register_time` datetime NOT NULL COMMENT '注册时间', + `last_login_time` datetime NOT NULL COMMENT '最后登录时间', + `total_points` int NOT NULL DEFAULT 0 COMMENT '账户总积分', + `used_points` int NOT NULL DEFAULT 0 COMMENT '账户消耗积分', + `remaining_points` int NOT NULL DEFAULT 0 COMMENT '账户剩余积分', + `total_storage` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '云空间总容量(GB)', + `used_storage` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '云空间已用容量(GB)', + `remaining_storage` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '云空间剩余容量(GB)', + `total_recharge` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '总充值金额', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_user_id` (`user_id`) USING BTREE, + UNIQUE KEY `uk_mobile` (`mobile`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_register_time` (`register_time`) USING BTREE, + KEY `idx_last_login_time` (`last_login_time`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='会员用户表'; + +-- 充值记录表 +CREATE TABLE `member_recharge_record` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `user_id` bigint NOT NULL DEFAULT 0 COMMENT '用户编号', + `mobile` varchar(20) NOT NULL COMMENT '手机号', + `recharge_amount` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '充值金额', + `recharge_type` varchar(20) NOT NULL DEFAULT '' COMMENT '充值方式(alipay-支付宝 wechat-微信 admin-人工)', + `order_type` varchar(50) NOT NULL DEFAULT '' COMMENT '订单类型(purchase-权限购买 exchange-积分兑换)', + `permission_type` varchar(100) NOT NULL DEFAULT '' COMMENT '购买权限类型', + `bonus_points` int NOT NULL DEFAULT 0 COMMENT '获得积分', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-失败 1-成功)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id_user_id` (`tenant_id`, `user_id`) USING BTREE, + KEY `idx_user_id_create_time` (`user_id`, `create_time`) USING BTREE, + KEY `idx_recharge_type` (`recharge_type`) USING BTREE, + KEY `idx_order_type` (`order_type`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='充值记录表'; + +-- =============================================== +-- 3. 礼包管理模块 +-- =============================================== + +-- 礼包表 +CREATE TABLE `member_gift_package` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `package_id` varchar(32) NOT NULL COMMENT '礼包ID', + `package_name` varchar(100) NOT NULL COMMENT '礼包名称', + `sort_order` int NOT NULL DEFAULT 0 COMMENT 'C端展示排序', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `price` decimal(10,2) NOT NULL DEFAULT 0.00 COMMENT '购买价格', + `validity_days` int NOT NULL DEFAULT 0 COMMENT '有效期(天)', + `bonus_points` int NOT NULL DEFAULT 0 COMMENT '赠送积分', + `applications` text NOT NULL COMMENT '关联应用(JSON格式)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `operator_id` bigint NOT NULL DEFAULT 0 COMMENT '操作人用户编号', + `operator_name` varchar(64) NOT NULL DEFAULT '' COMMENT '操作人账号', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_package_id` (`package_id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_sort_order` (`sort_order`) USING BTREE, + KEY `idx_status` (`status`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='礼包表'; + +-- =============================================== +-- 4. 模型管理模块 +-- =============================================== + +-- AI模型表 +CREATE TABLE `ai_model` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `model_name` varchar(100) NOT NULL COMMENT '模型名称', + `model_code` varchar(100) NOT NULL COMMENT '模型标识/编码', + `platform` varchar(50) NOT NULL COMMENT '所属平台', + `api_key` varchar(200) NOT NULL COMMENT 'API秘钥', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `temperature` decimal(3,2) NOT NULL DEFAULT 0.70 COMMENT '温度参数', + `max_tokens` int NOT NULL DEFAULT 0 COMMENT '回复数Token数', + `daily_limit` int NOT NULL DEFAULT 0 COMMENT '每日请求次数', + `model_type` varchar(50) NOT NULL COMMENT '模型类型(image-图像 text-文本 video-视频 audio-音频)', + `consume_points` int NOT NULL DEFAULT 0 COMMENT '消耗积分', + `max_text_length` int NOT NULL DEFAULT 0 COMMENT '最大文本数量', + `max_image_size` varchar(50) NOT NULL DEFAULT '' COMMENT '图片最大像素', + `max_video_duration` int NOT NULL DEFAULT 0 COMMENT '视频最大时长(秒)', + `max_video_quality` varchar(20) NOT NULL DEFAULT '' COMMENT '视频最大质量', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_platform` (`platform`) USING BTREE, + KEY `idx_model_type` (`model_type`) USING BTREE, + KEY `idx_status` (`status`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='AI模型表'; + +-- =============================================== +-- 5. 应用功能管理模块 +-- =============================================== + +-- 应用功能表 +CREATE TABLE `ai_application` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `app_id` varchar(32) NOT NULL COMMENT '应用ID', + `app_name` varchar(100) NOT NULL COMMENT '应用名称', + `api_key` varchar(200) NOT NULL COMMENT '第三方API秘钥', + `consume_points` int NOT NULL DEFAULT 0 COMMENT '单位消耗积分', + `unit_type` varchar(20) NOT NULL COMMENT '消耗单位(time-时长 count-次数)', + `unit_value` varchar(50) NOT NULL COMMENT '单位值(如:1min、20次)', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_app_id` (`app_id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_app_name` (`app_name`) USING BTREE, + KEY `idx_status` (`status`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='应用功能表'; + +-- =============================================== +-- 6. 智能体配置模块 +-- =============================================== + +-- 智能体表 +CREATE TABLE `ai_agent` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `agent_id` varchar(32) NOT NULL COMMENT '智能体ID', + `agent_name` varchar(100) NOT NULL COMMENT '智能体名称', + `icon` varchar(200) NOT NULL DEFAULT '' COMMENT '图标URL', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-禁用 1-启用)', + `description` text NOT NULL COMMENT '设定描述', + `system_prompt` text NOT NULL COMMENT '预置提示词', + `remark` varchar(500) NOT NULL DEFAULT '' COMMENT '备注', + `operator_id` bigint NOT NULL DEFAULT 0 COMMENT '操作人用户编号', + `operator_name` varchar(64) NOT NULL DEFAULT '' COMMENT '操作人账号', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + `updater` varchar(64) NOT NULL DEFAULT '' COMMENT '更新者', + `deleted` bit NOT NULL DEFAULT b'0' COMMENT '是否删除', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_agent_id` (`agent_id`) USING BTREE, + KEY `idx_tenant_id` (`tenant_id`) USING BTREE, + KEY `idx_agent_name` (`agent_name`) USING BTREE, + KEY `idx_status` (`status`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='智能体表'; + +-- =============================================== +-- 7. 权限管理表 +-- =============================================== + +-- 用户权限表 +CREATE TABLE `member_user_permission` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键', + `tenant_id` bigint NOT NULL DEFAULT 0 COMMENT '租户编号', + `user_id` bigint NOT NULL DEFAULT 0 COMMENT '用户编号', + `permission_type` varchar(100) NOT NULL COMMENT '权限类型', + `package_id` bigint NOT NULL DEFAULT 0 COMMENT '礼包ID', + `validity_start` datetime NOT NULL COMMENT '有效期开始时间', + `validity_end` datetime NOT NULL COMMENT '有效期结束时间', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态(0-过期 1-有效)', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `creator` varchar(64) NOT NULL DEFAULT '' COMMENT '创建者', + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_tenant_id_user_id` (`tenant_id`, `user_id`) USING BTREE, + KEY `idx_user_id` (`user_id`) USING BTREE, + KEY `idx_package_id` (`package_id`) USING BTREE, + KEY `idx_validity_end` (`validity_end`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='用户权限表'; diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java new file mode 100644 index 0000000000..329c39f27f --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java @@ -0,0 +1,160 @@ +package cn.iocoder.yudao.module.tik.voice.client; + +import cn.hutool.core.util.StrUtil; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult; +import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties; +import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig; +import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +/** + * CosyVoice Provider 实现 + * + *

阿里云 CosyVoice 语音服务的 Provider 实现。 + * 内部委托给 {@link CosyVoiceClient} 进行实际的API调用。 + * + * @author 芋道源码 + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class CosyVoiceProvider implements VoiceCloneProvider { + + private final CosyVoiceClient cosyVoiceClient; + + /** + * 新配置(支持多供应商) + */ + private final VoiceProviderProperties voiceProviderProperties; + + /** + * 旧配置(向后兼容) + */ + private final CosyVoiceProperties cosyVoiceProperties; + + /** + * 获取 CosyVoice 配置 + * 优先使用新配置,如果不存在则使用旧配置(向后兼容) + */ + private CosyVoiceProviderConfig getConfig() { + // 尝试从新配置获取 + var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice"); + if (baseConfig instanceof CosyVoiceProviderConfig cosyConfig) { + return cosyConfig; + } + + // 回退到旧配置(向后兼容) + if (cosyVoiceProperties != null && cosyVoiceProperties.isEnabled()) { + return migrateFromLegacyConfig(cosyVoiceProperties); + } + + // 返回空配置 + return new CosyVoiceProviderConfig(); + } + + /** + * 从旧配置迁移到新配置格式 + */ + private CosyVoiceProviderConfig migrateFromLegacyConfig(CosyVoiceProperties legacy) { + var config = new CosyVoiceProviderConfig(); + config.setEnabled(true); + config.setApiKey(legacy.getApiKey()); + config.setDefaultModel(legacy.getDefaultModel()); + config.setDefaultVoiceId(legacy.getDefaultVoiceId()); + config.setSampleRate(legacy.getSampleRate()); + config.setAudioFormat(legacy.getAudioFormat()); + config.setPreviewText(legacy.getPreviewText()); + config.setTtsUrl(legacy.getTtsUrl()); + config.setVoiceEnrollmentUrl(legacy.getVoiceEnrollmentUrl()); + config.setConnectTimeout(legacy.getConnectTimeout()); + config.setReadTimeout(legacy.getReadTimeout()); + return config; + } + + @Override + public VoiceCloneResult cloneVoice(VoiceCloneRequest request) { + log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]", + request.getAudioUrl(), request.getModel()); + + // 适配到 CosyVoiceCloneRequest + cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest cosyRequest = + new cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest(); + + cosyRequest.setUrl(request.getAudioUrl()); + cosyRequest.setTargetModel(request.getModel()); + cosyRequest.setPrefix(request.getPrefix()); + if (request.getSampleRate() != null) { + cosyRequest.setSampleRate(request.getSampleRate()); + } + if (request.getAudioFormat() != null) { + cosyRequest.setAudioFormat(request.getAudioFormat()); + } + + // 调用底层 Client + cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult cosyResult = + cosyVoiceClient.cloneVoice(cosyRequest); + + // 适配到统一 Result + VoiceCloneResult result = new VoiceCloneResult(); + result.setVoiceId(cosyResult.getVoiceId()); + result.setRequestId(cosyResult.getRequestId()); + + log.info("[CosyVoiceProvider][语音克隆成功][voiceId={}]", result.getVoiceId()); + return result; + } + + @Override + public VoiceTtsResult synthesize(VoiceTtsRequest request) { + log.info("[CosyVoiceProvider][语音合成][voiceId={}, textLength={}, model={}]", + request.getVoiceId(), + request.getText() != null ? request.getText().length() : 0, + request.getModel()); + + // 适配到 CosyVoiceTtsRequest + cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest cosyRequest = + cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest.builder() + .text(request.getText()) + .voiceId(request.getVoiceId()) + .fileUrl(request.getFileUrl()) + .referenceText(request.getReferenceText()) + .model(request.getModel()) + .speechRate(request.getSpeechRate()) + .volume(request.getVolume()) + .instruction(request.getInstruction()) + .sampleRate(request.getSampleRate()) + .audioFormat(request.getAudioFormat()) + .preview(request.isPreview()) + .build(); + + // 调用底层 Client + cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult = + cosyVoiceClient.synthesize(cosyRequest); + + // 适配到统一 Result + VoiceTtsResult result = new VoiceTtsResult(); + result.setRequestId(cosyResult.getRequestId()); + result.setFormat(cosyResult.getFormat()); + result.setSampleRate(cosyResult.getSampleRate()); + result.setAudio(cosyResult.getAudio()); + result.setVoiceId(cosyResult.getVoiceId()); + + log.info("[CosyVoiceProvider][语音合成成功][format={}, audioSize={}]", + result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0); + return result; + } + + @Override + public boolean supports(String providerType) { + return "cosyvoice".equalsIgnoreCase(providerType); + } + + @Override + public String getProviderType() { + return "cosyvoice"; + } +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java new file mode 100644 index 0000000000..99763ec84e --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java @@ -0,0 +1,55 @@ +package cn.iocoder.yudao.module.tik.voice.client; + +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult; + +/** + * 语音克隆 Provider 统一接口 + * + *

支持多供应商实现的语音克隆和语音合成服务。 + * 通过工厂类 {@link VoiceCloneProviderFactory} 获取具体实现。 + * + * @author 芋道源码 + */ +public interface VoiceCloneProvider { + + /** + * 语音克隆 + * + *

根据提供的音频文件URL,克隆目标音色。 + * 不同供应商的实现细节被此接口屏蔽。 + * + * @param request 语音克隆请求 + * @return 语音克隆结果,包含生成的 voiceId + * @throws RuntimeException 当克隆失败时抛出 + */ + VoiceCloneResult cloneVoice(VoiceCloneRequest request); + + /** + * 文本转语音合成 + * + *

将文本转换为语音,支持使用已克隆的音色或系统音色。 + * + * @param request 语音合成请求 + * @return 语音合成结果,包含音频数据 + * @throws RuntimeException 当合成失败时抛出 + */ + VoiceTtsResult synthesize(VoiceTtsRequest request); + + /** + * 检查是否支持指定的供应商类型 + * + * @param providerType 供应商类型(如 "cosyvoice", "siliconflow") + * @return true 如果支持,false 否则 + */ + boolean supports(String providerType); + + /** + * 获取供应商类型标识 + * + * @return 供应商类型,如 "cosyvoice", "siliconflow" + */ + String getProviderType(); +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java new file mode 100644 index 0000000000..09e7be6606 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java @@ -0,0 +1,104 @@ +package cn.iocoder.yudao.module.tik.voice.client; + +import cn.iocoder.yudao.framework.common.exception.ServiceException; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; +import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0; +import static cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants.VOICE_TTS_FAILED; + +/** + * 语音克隆 Provider 工厂 + * + *

负责管理和获取不同的语音克隆 Provider 实现。 + * 支持多供应商配置和动态切换。 + * + * @author 芋道源码 + */ +@Slf4j +@Component +public class VoiceCloneProviderFactory { + + private final Map providers = new ConcurrentHashMap<>(); + + @Autowired + public VoiceCloneProviderFactory(List providerList) { + // 自动注册所有 Provider 实现类 + for (VoiceCloneProvider provider : providerList) { + registerProvider(provider); + log.info("[VoiceCloneProviderFactory][注册Provider][type={}]", provider.getProviderType()); + } + } + + /** + * 注册 Provider + * + * @param provider Provider 实例 + */ + public void registerProvider(VoiceCloneProvider provider) { + String type = provider.getProviderType(); + if (providers.containsKey(type)) { + log.warn("[VoiceCloneProviderFactory][Provider已存在,覆盖][type={}]", type); + } + providers.put(type, provider); + } + + /** + * 获取默认 Provider + * + * @return 默认的 Provider 实例 + * @throws ServiceException 当没有可用的 Provider 时抛出 + */ + public VoiceCloneProvider getDefaultProvider() { + if (providers.isEmpty()) { + throw exception0(VOICE_TTS_FAILED.getCode(), "未配置任何语音克隆 Provider"); + } + // 返回第一个注册的 Provider 作为默认 + return providers.values().iterator().next(); + } + + /** + * 根据类型获取 Provider + * + * @param providerType 供应商类型(如 "cosyvoice", "siliconflow") + * @return 对应的 Provider 实例 + * @throws ServiceException 当 Provider 不存在时抛出 + */ + public VoiceCloneProvider getProvider(String providerType) { + if (providerType == null || providerType.trim().isEmpty()) { + return getDefaultProvider(); + } + + VoiceCloneProvider provider = providers.get(providerType); + if (provider == null) { + throw exception0(VOICE_TTS_FAILED.getCode(), "不支持的语音克隆供应商: " + providerType); + } + + return provider; + } + + /** + * 检查是否支持指定的供应商类型 + * + * @param providerType 供应商类型 + * @return true 如果支持,false 否则 + */ + public boolean hasProvider(String providerType) { + return providerType != null && providers.containsKey(providerType); + } + + /** + * 获取所有已注册的 Provider 类型 + * + * @return 供应商类型列表 + */ + public List getAvailableProviderTypes() { + return List.copyOf(providers.keySet()); + } +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java new file mode 100644 index 0000000000..1884ed3b62 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java @@ -0,0 +1,51 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Data; + +/** + * 语音克隆请求(统一DTO) + * + *

屏蔽不同供应商API差异,提供统一的请求结构。 + * 各Provider实现负责将此DTO转换为供应商特定格式。 + * + * @author 芋道源码 + */ +@Data +public class VoiceCloneRequest { + + /** + * 音频文件公网URL + * + *

CosyVoice: 对应 {@code url} 字段

+ *

SiliconFlow: 对应 {@code audio} 字段(需base64编码)

+ */ + private String audioUrl; + + /** + * 模型名称 + * + *

CosyVoice: 对应 {@code targetModel},如 {@code cosyvoice-v3-flash}

+ *

SiliconFlow: 对应 {@code model},如 {@code indextts-2}

+ */ + private String model; + + /** + * 音色自定义前缀(可选) + * + *

CosyVoice: 必填,仅允许数字和小写字母,长度<10字符

+ *

SiliconFlow: 不适用

+ */ + private String prefix; + + /** + * 采样率,默认24000 + */ + private Integer sampleRate; + + /** + * 音频格式,默认mp3 + * + *

可选值: mp3, wav, flac

+ */ + private String audioFormat; +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneResult.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneResult.java new file mode 100644 index 0000000000..b8bdd92698 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneResult.java @@ -0,0 +1,24 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Data; + +/** + * 语音克隆结果(统一DTO) + * + * @author 芋道源码 + */ +@Data +public class VoiceCloneResult { + + /** + * 生成的音色ID + * + *

后续TTS合成时使用此ID

+ */ + private String voiceId; + + /** + * 请求ID(用于追踪) + */ + private String requestId; +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsRequest.java new file mode 100644 index 0000000000..ee962c7971 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsRequest.java @@ -0,0 +1,77 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Builder; +import lombok.Data; + +/** + * 文本转语音请求(统一DTO) + * + *

屏蔽不同供应商API差异,提供统一的请求结构。 + * + * @author 芋道源码 + */ +@Data +@Builder +public class VoiceTtsRequest { + + /** + * 待合成文本 + */ + private String text; + + /** + * 音色ID(可选,默认使用配置) + * + *

使用语音克隆生成的voiceId

+ */ + private String voiceId; + + /** + * 语音文件URL(当使用语音URL合成时使用,替代voiceId) + * + *

用于实时语音克隆,无需提前克隆

+ */ + private String fileUrl; + + /** + * 参考音频文本(当使用fileUrl时,用于提高克隆质量) + */ + private String referenceText; + + /** + * 模型(默认使用供应商默认模型) + */ + private String model; + + /** + * 语速(0.5 - 2.0,默认1.0) + */ + private Float speechRate; + + /** + * 音量(-100 - 100,默认0) + */ + private Float volume; + + /** + * 指令(用于控制音色风格),可选 + */ + private String instruction; + + /** + * 采样率(默认24000) + */ + private Integer sampleRate; + + /** + * 音频格式(默认mp3) + * + *

可选值: mp3, wav, flac

+ */ + private String audioFormat; + + /** + * 是否仅用于试听(方便服务侧做限流) + */ + private boolean preview; +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsResult.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsResult.java new file mode 100644 index 0000000000..29f7d0e92b --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsResult.java @@ -0,0 +1,39 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Data; + +/** + * 文本转语音结果(统一DTO) + * + * @author 芋道源码 + */ +@Data +public class VoiceTtsResult { + + /** + * 请求ID(用于追踪) + */ + private String requestId; + + /** + * 返回的音频格式 + * + *

mp3, wav, flac 等

+ */ + private String format; + + /** + * 采样率 + */ + private Integer sampleRate; + + /** + * 音频二进制内容 + */ + private byte[] audio; + + /** + * 音频所使用的 voiceId + */ + private String voiceId; +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java new file mode 100644 index 0000000000..2a65d82637 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java @@ -0,0 +1,64 @@ +package cn.iocoder.yudao.module.tik.voice.config; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import java.time.Duration; + +/** + * CosyVoice 供应商配置 + * + *

继承通用配置,添加 CosyVoice 特有字段。 + * + * @author 芋道源码 + */ +@Data +@EqualsAndHashCode(callSuper = true) +public class CosyVoiceProviderConfig extends VoiceProviderProperties.ProviderConfig { + + /** + * 默认模型 + */ + private String defaultModel = "cosyvoice-v3-flash"; + + /** + * 默认 voiceId(可选) + */ + private String defaultVoiceId; + + /** + * 默认采样率 + */ + private Integer sampleRate = 24000; + + /** + * 默认音频格式 + */ + private String audioFormat = "mp3"; + + /** + * 试听默认示例文本 + */ + private String previewText = "您好,欢迎体验专属音色。"; + + /** + * TTS 接口地址 + */ + private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis"; + + /** + * 语音复刻接口地址(声音注册) + */ + private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment"; + + /** + * 连接超时时间 + */ + private Duration connectTimeout = Duration.ofSeconds(10); + + /** + * 读取超时时间(3分钟,提升语音合成成功率) + */ + private Duration readTimeout = Duration.ofSeconds(180); + +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java new file mode 100644 index 0000000000..f97ac57b0e --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java @@ -0,0 +1,78 @@ +package cn.iocoder.yudao.module.tik.voice.config; + +import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.Map; + +/** + * 语音 Provider 统一配置 + * + *

支持多供应商配置,默认供应商选择。 + * + * @author 芋道源码 + */ +@Data +@Component +@ConfigurationProperties(prefix = "yudao.voice") +public class VoiceProviderProperties { + + /** + * 默认供应商类型 + * + *

可选值: cosyvoice, siliconflow 等 + */ + private String defaultProvider = "cosyvoice"; + + /** + * 各供应商配置 + * + *

key 为供应商类型(如 cosyvoice, siliconflow) + */ + private Map providers = new HashMap<>(); + + /** + * 供应商通用配置基类 + */ + @Data + public static class ProviderConfig { + /** + * 是否启用 + */ + private boolean enabled = true; + + /** + * API Key + */ + private String apiKey; + + /** + * 优先级(数字越小优先级越高,用于故障转移) + */ + private Integer priority = 100; + } + + /** + * 获取指定供应商配置 + * + * @param providerType 供应商类型 + * @return 配置对象,不存在返回 null + */ + public ProviderConfig getProviderConfig(String providerType) { + return providers.get(providerType); + } + + /** + * 检查供应商是否启用 + * + * @param providerType 供应商类型 + * @return true 如果启用且配置存在 + */ + public boolean isProviderEnabled(String providerType) { + ProviderConfig config = getProviderConfig(providerType); + return config != null && config.isEnabled(); + } + +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java index 36f0065c5f..0ba4723c9f 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java @@ -19,12 +19,14 @@ import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper; import cn.iocoder.yudao.module.tik.file.service.TikUserFileService; import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService; import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX; -import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult; +import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProvider; +import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProviderFactory; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult; import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties; +import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties; import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO; import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper; import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO; @@ -84,11 +86,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { private TikHupService tikHupService; @Resource - private CosyVoiceClient cosyVoiceClient; + private VoiceCloneProviderFactory voiceProviderFactory; @Resource private CosyVoiceProperties cosyVoiceProperties; + @Resource + private VoiceProviderProperties voiceProviderProperties; + @Resource private StringRedisTemplate stringRedisTemplate; @@ -139,17 +144,20 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .setTranscription(null); // 初始为空,表示未识别 voiceMapper.insert(voice); - // 4. 调用阿里云语音复刻服务,生成 voice_id + // 4. 调用语音克隆服务,生成 voice_id try { - log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({})]", voice.getId(), fileDO.getId()); + log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]", + voice.getId(), fileDO.getId(), createReqVO.getProviderType()); String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest(); - cloneRequest.setTargetModel("cosyvoice-v3-flash"); // 使用v3-flash模型 + // 使用 Provider 接口(支持前端选择供应商,不传则使用默认) + VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType()); + VoiceCloneRequest cloneRequest = new VoiceCloneRequest(); + cloneRequest.setAudioUrl(fileAccessUrl); + cloneRequest.setModel("cosyvoice-v3-flash"); // 使用v3-flash模型 cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求 - cloneRequest.setUrl(fileAccessUrl); - CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest); + VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest); String voiceId = cloneResult.getVoiceId(); // 更新配音记录,保存 voice_id @@ -432,22 +440,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return buildSynthResponseFromCache(reqVO, synthCache); } - CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest( - finalText, - voiceId, - fileUrl, - transcriptionText, - reqVO.getModel(), - reqVO.getSpeechRate(), - reqVO.getVolume(), - reqVO.getInstruction(), - reqVO.getSampleRate(), - reqVO.getAudioFormat(), - false - )); + // 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认) + VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType()); + VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder() + .text(finalText) + .voiceId(voiceId) + .fileUrl(fileUrl) + .referenceText(transcriptionText) + .model(reqVO.getModel()) + .speechRate(reqVO.getSpeechRate()) + .volume(reqVO.getVolume()) + .instruction(reqVO.getInstruction()) + .sampleRate(reqVO.getSampleRate()) + .audioFormat(reqVO.getAudioFormat()) + .preview(false) + .build(); + + VoiceTtsResult ttsResult = provider.synthesize(ttsRequest); String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat()); - String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId()); + String finalVoiceId = StrUtil.blankToDefault(voiceId, getDefaultVoiceId()); // 【安全方案】不暴露OSS链接,直接返回Base64编码的音频数据 String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio()); @@ -527,7 +539,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { voiceId = voice.getVoiceId(); // 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText transcriptionText = null; // 清除 transcriptionText - inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText()); + inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText()); } else { log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId); // 获取文件信息,用于获取文件URL @@ -543,17 +555,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); } inputText = StrUtil.blankToDefault(reqVO.getInputText(), - StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText())); + StrUtil.blankToDefault(transcriptionText, getPreviewText())); } } // 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId) else { log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId); - voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId()); + voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId()); if (StrUtil.isBlank(voiceId)) { throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空"); } - inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText()); + inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText()); } String finalText = determineSynthesisText( @@ -588,21 +600,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId); } - log.info("[previewVoice][调用CosyVoice合成,配音编号({}),voiceId({}),fileUrl({}),文本长度({})]", - voiceConfigId, voiceId, fileUrl, finalText.length()); - CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest( - finalText, - voiceId, - fileUrl, - transcriptionText, // 参考音频文本,用于提高克隆质量 - null, // 使用默认模型 - speechRate, - volume, - instruction, - null, - audioFormat, - true - )); + log.info("[previewVoice][调用语音合成服务,配音编号({}),voiceId({}),fileUrl({}),文本长度({}),供应商({})]", + voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType()); + + // 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认) + VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType()); + VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder() + .text(finalText) + .voiceId(voiceId) + .fileUrl(fileUrl) + .referenceText(transcriptionText) + .model(null) // 使用默认模型 + .speechRate(speechRate) + .volume(volume) + .instruction(instruction) + .sampleRate(null) + .audioFormat(audioFormat) + .preview(true) + .build(); + + VoiceTtsResult ttsResult = provider.synthesize(ttsRequest); String format = defaultFormat(ttsResult.getFormat(), audioFormat); String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice"; @@ -622,35 +639,53 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return buildPreviewResp(audioBase64, format, voiceId); } - private CosyVoiceTtsRequest buildTtsRequest(String text, - String voiceId, - String fileUrl, - String referenceText, - String model, - Float speechRate, - Float volume, - String instruction, - Integer sampleRate, - String audioFormat, - boolean preview) { - return CosyVoiceTtsRequest.builder() - .text(text) - .voiceId(voiceId) - .fileUrl(fileUrl) - .referenceText(referenceText) - .model(model) - .speechRate(speechRate) - .volume(volume) - .instruction(instruction) - .sampleRate(sampleRate) - .audioFormat(audioFormat) - .preview(preview) - .build(); + /** + * 获取 CosyVoice 配置(统一入口) + * 优先使用新配置,回退到旧配置 + */ + private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() { + if (voiceProviderProperties != null) { + var config = voiceProviderProperties.getProviderConfig("cosyvoice"); + if (config instanceof cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig cosyConfig) { + return cosyConfig; + } + } + return null; + } + + /** + * 获取默认音频格式 + * 优先使用新配置,回退到旧配置 + */ + private String getDefaultFormat() { + var config = getCosyVoiceConfig(); + if (config != null) { + return config.getAudioFormat(); + } + if (cosyVoiceProperties != null) { + return cosyVoiceProperties.getAudioFormat(); + } + return "mp3"; + } + + /** + * 获取默认采样率 + * 优先使用新配置,回退到旧配置 + */ + private Integer getDefaultSampleRate() { + var config = getCosyVoiceConfig(); + if (config != null) { + return config.getSampleRate(); + } + if (cosyVoiceProperties != null) { + return cosyVoiceProperties.getSampleRate(); + } + return 24000; } private String defaultFormat(String responseFormat, String requestFormat) { return StrUtil.blankToDefault(responseFormat, - StrUtil.blankToDefault(requestFormat, cosyVoiceProperties.getAudioFormat())); + StrUtil.blankToDefault(requestFormat, getDefaultFormat())); } private String buildFileName(String voiceId, String format) { @@ -687,7 +722,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return builder.toString(); } if (allowFallback) { - return cosyVoiceProperties.getPreviewText(); + return getPreviewText(); } throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容"); } @@ -750,15 +785,19 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { } else { identifier = "no-voice"; } - + + // 获取默认配置 + String defaultFormat = getDefaultFormat(); + Integer defaultSampleRate = getDefaultSampleRate(); + String payload = StrUtil.join("|", identifier, text, speechRate != null ? speechRate : "1.0", volume != null ? volume : "0", instruction, - StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()), - sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate()); + StrUtil.blankToDefault(audioFormat, defaultFormat), + sampleRate != null ? sampleRate : defaultSampleRate); String hash = cn.hutool.crypto.SecureUtil.sha256(payload); return prefix + hash; } @@ -1123,5 +1162,35 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { builder.append(normalized); } + /** + * 获取默认音色ID + * 优先使用新配置,回退到旧配置 + */ + private String getDefaultVoiceId() { + var config = getCosyVoiceConfig(); + if (config != null) { + return config.getDefaultVoiceId(); + } + if (cosyVoiceProperties != null) { + return cosyVoiceProperties.getDefaultVoiceId(); + } + return null; + } + + /** + * 获取试听文本 + * 优先使用新配置,回退到旧配置 + */ + private String getPreviewText() { + var config = getCosyVoiceConfig(); + if (config != null) { + return config.getPreviewText(); + } + if (cosyVoiceProperties != null) { + return cosyVoiceProperties.getPreviewText(); + } + return "您好,欢迎体验专属音色。"; + } + } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java index 1aaae3eaa6..4720d54634 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java @@ -34,5 +34,7 @@ public class AppTikUserVoiceCreateReqVO { @Schema(description = "备注", example = "这是一个测试配音") private String note; -} + @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") + private String providerType; +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java index cea266070e..a10bd5955f 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java @@ -42,6 +42,8 @@ public class AppTikVoicePreviewReqVO { @Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读") private String instruction; + + @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") + private String providerType; + } - - diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java index e4e9f76d79..0105cab406 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java @@ -44,6 +44,8 @@ public class AppTikVoiceTtsReqVO { @Schema(description = "音频格式,默认 wav,可选 mp3") private String audioFormat; + + @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") + private String providerType; + } - -