From 0efca50be332bf5682da2285b21ba3b16943d752 Mon Sep 17 00:00:00 2001 From: shenaowei <450702724@qq.com> Date: Wed, 25 Feb 2026 16:28:31 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/components/agents/ChatDrawer.vue | 11 +- frontend/app/web-gold/src/utils/clipboard.ts | 42 +++ .../src/views/content-style/Benchmark.vue | 10 +- .../src/views/content-style/Copywriting.vue | 44 +-- .../app/web-gold/src/views/dh/VoiceCopy.vue | 8 +- .../src/views/kling/types/identify-face.ts | 3 +- .../web-gold/src/views/trends/Forecast.vue | 7 +- .../tik/voice/client/CosyVoiceClient.java | 355 ------------------ .../tik/voice/client/CosyVoiceProvider.java | 124 ------ .../tik/voice/client/SiliconFlowProvider.java | 44 ++- .../tik/voice/client/VoiceCloneProvider.java | 4 +- .../client/VoiceCloneProviderFactory.java | 2 +- .../client/dto/CosyVoiceCloneRequest.java | 36 -- .../client/dto/CosyVoiceCloneResult.java | 21 -- .../voice/client/dto/CosyVoiceTtsRequest.java | 69 ---- .../voice/client/dto/CosyVoiceTtsResult.java | 37 -- .../client/dto/SiliconFlowReference.java | 27 ++ .../client/dto/SiliconFlowTtsRequest.java | 16 + .../voice/client/dto/VoiceCloneRequest.java | 4 - .../voice/config/CosyVoiceProviderConfig.java | 68 ---- .../voice/config/VoiceAutoConfiguration.java | 22 -- .../voice/config/VoiceProviderProperties.java | 6 +- .../controller/AppTikUserVoiceController.java | 2 +- .../dal/dataobject/TikDigitalHumanTaskDO.java | 6 +- .../voice/dal/dataobject/TikUserVoiceDO.java | 4 - .../tik/voice/enums/CosyVoiceEmotionEnum.java | 38 -- .../service/DigitalHumanTaskServiceImpl.java | 32 +- .../voice/service/TikUserVoiceService.java | 2 +- .../service/TikUserVoiceServiceImpl.java | 248 +++--------- .../vo/AppTikDigitalHumanCreateReqVO.java | 2 +- .../voice/vo/AppTikDigitalHumanRespVO.java | 2 +- .../voice/vo/AppTikUserVoiceCreateReqVO.java | 2 +- .../tik/voice/vo/AppTikUserVoiceRespVO.java | 3 - .../tik/voice/vo/AppTikVoicePreviewReqVO.java | 4 +- .../voice/vo/AppTikVoicePreviewRespVO.java | 2 +- .../tik/voice/vo/AppTikVoiceTtsReqVO.java | 6 +- .../tik/voice/vo/AppTikVoiceTtsRespVO.java | 4 +- .../src/main/resources/application-local.yaml | 4 - .../src/main/resources/application.yaml | 9 - 39 files changed, 237 insertions(+), 1093 deletions(-) create mode 100644 frontend/app/web-gold/src/utils/clipboard.ts delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsResult.java create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowReference.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java delete mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java diff --git a/frontend/app/web-gold/src/components/agents/ChatDrawer.vue b/frontend/app/web-gold/src/components/agents/ChatDrawer.vue index 3811e7f458..afb37f1e29 100644 --- a/frontend/app/web-gold/src/components/agents/ChatDrawer.vue +++ b/frontend/app/web-gold/src/components/agents/ChatDrawer.vue @@ -158,6 +158,7 @@ import { } from '@ant-design/icons-vue' import { message, Modal } from 'ant-design-vue' import { sendChatStream } from '@/api/agent' +import { copyToClipboard } from '@/utils/clipboard' const props = defineProps({ visible: { type: Boolean, default: false }, @@ -278,9 +279,13 @@ const handleKeyDown = (e) => { } } -const handleCopy = (content) => { - navigator.clipboard.writeText(content) - message.success('已复制') +const handleCopy = async (content) => { + const success = await copyToClipboard(content) + if (success) { + message.success('已复制') + } else { + message.error('复制失败') + } } const handleRegenerate = async () => { diff --git a/frontend/app/web-gold/src/utils/clipboard.ts b/frontend/app/web-gold/src/utils/clipboard.ts new file mode 100644 index 0000000000..9f3e11c420 --- /dev/null +++ b/frontend/app/web-gold/src/utils/clipboard.ts @@ -0,0 +1,42 @@ +/** + * 复制文本到剪贴板 + * 兼容非 HTTPS 环境的降级方案 + */ +export async function copyToClipboard(text: string): Promise { + if (!text?.trim()) { + return false + } + + // 优先使用 Clipboard API(需要 HTTPS 或 localhost) + if (navigator.clipboard?.writeText) { + try { + await navigator.clipboard.writeText(text) + return true + } catch { + // 降级到 execCommand 方案 + } + } + + // 降级方案:使用 textarea + execCommand + return fallbackCopy(text) +} + +/** + * 降级复制方案 + */ +function fallbackCopy(text: string): boolean { + try { + const textarea = document.createElement('textarea') + textarea.value = text + textarea.style.position = 'fixed' + textarea.style.opacity = '0' + textarea.style.left = '-9999px' + document.body.appendChild(textarea) + textarea.select() + const success = document.execCommand('copy') + document.body.removeChild(textarea) + return success + } catch { + return false + } +} diff --git a/frontend/app/web-gold/src/views/content-style/Benchmark.vue b/frontend/app/web-gold/src/views/content-style/Benchmark.vue index 3e9f9756cf..c67c4103ca 100644 --- a/frontend/app/web-gold/src/views/content-style/Benchmark.vue +++ b/frontend/app/web-gold/src/views/content-style/Benchmark.vue @@ -10,6 +10,7 @@ import TikhubService, { InterfaceType, MethodType } from '@/api/tikhub/index.js' import { useBenchmarkData } from './composables/useBenchmarkData' import { useBenchmarkAnalysis } from './composables/useBenchmarkAnalysis' import { formatTime } from './utils/benchmarkUtils' +import { copyToClipboard } from '@/utils/clipboard' import BenchmarkForm from './components/BenchmarkForm.vue' import BenchmarkTable from './components/BenchmarkTable.vue' import BatchAnalyzeModal from './components/BatchAnalyzeModal.vue' @@ -231,17 +232,18 @@ async function handleLoadMore() { } } -function handleCopyBatchPrompt(prompt) { +async function handleCopyBatchPrompt(prompt) { if (!prompt?.trim()) { message.warning('没有提示词可复制') return } - navigator.clipboard.writeText(prompt).then(() => { + const success = await copyToClipboard(prompt) + if (success) { message.success('提示词已复制到剪贴板') - }).catch(() => { + } else { message.error('复制失败') - }) + } } function handleUseBatchPrompt(prompt) { diff --git a/frontend/app/web-gold/src/views/content-style/Copywriting.vue b/frontend/app/web-gold/src/views/content-style/Copywriting.vue index 4b2bc42789..b552fb8219 100644 --- a/frontend/app/web-gold/src/views/content-style/Copywriting.vue +++ b/frontend/app/web-gold/src/views/content-style/Copywriting.vue @@ -11,6 +11,7 @@ import { useUserStore } from '@/stores/user' import GradientButton from '@/components/GradientButton.vue' import PromptSelector from '@/components/PromptSelector.vue' import { setJSON, getJSON } from '@/utils/storage' +import { copyToClipboard } from '@/utils/clipboard' import BasicLayout from '@/layouts/components/BasicLayout.vue' const promptStore = usePromptStore() @@ -328,48 +329,19 @@ function cancelEdit() { message.info('已取消编辑') } -// 复制内容(编辑模式复制编辑区,否则复制生成内容),带降级方案 -function copyContent() { +// 复制内容(编辑模式复制编辑区,否则复制生成内容) +async function copyContent() { const text = isEditMode.value ? (editableContent.value || '') : (generatedContent.value || '') if (!text.trim()) { message.warning('没有可复制的内容') return } - // 优先使用异步 Clipboard API - if (navigator.clipboard && navigator.clipboard.writeText) { - navigator.clipboard.writeText(text).then(() => { - message.success('文案已复制到剪贴板') - }).catch(() => { - // 降级到选中复制 - fallbackCopy(text) - }) - return - } - // 直接降级 - fallbackCopy(text) -} - -function fallbackCopy(text) { - try { - const textarea = document.createElement('textarea') - textarea.value = text - textarea.style.position = 'fixed' - textarea.style.opacity = '0' - textarea.style.left = '-9999px' - document.body.appendChild(textarea) - textarea.focus() - textarea.select() - const ok = document.execCommand('copy') - document.body.removeChild(textarea) - if (ok) { - message.success('文案已复制到剪贴板') - } else { - message.error('复制失败,请手动复制') - } - } catch (e) { - console.warn('fallback copy failed:', e) - message.error('复制失败,请手动复制') + const success = await copyToClipboard(text) + if (success) { + message.success('文案已复制到剪贴板') + } else { + message.error('复制失败') } } diff --git a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue index 03e3c3bbe1..38e8b57111 100644 --- a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue +++ b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue @@ -89,8 +89,8 @@
- 支持格式:MP3、WAV、AAC、M4A、FLAC、OGG,单个文件不超过 50MB
- 🎤 配音建议:使用 30 秒 - 2 分钟的短配音效果更佳 + 支持格式:MP3、WAV、AAC、M4A、FLAC、OGG,单个文件不超过 5MB
+ 🎤 配音建议:使用 5-20 秒的短配音效果更佳
@@ -129,7 +129,7 @@ const DEFAULT_FORM_DATA = { fileUrl: '' } -const MAX_FILE_SIZE = 50 * 1024 * 1024 +const MAX_FILE_SIZE = 5 * 1024 * 1024 // SiliconFlow API 限制参考音频不超过 5MB const VALID_AUDIO_TYPES = ['audio/mpeg', 'audio/wav', 'audio/wave', 'audio/x-wav', 'audio/aac', 'audio/mp4', 'audio/flac', 'audio/ogg'] const VALID_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.aac', '.m4a', '.flac', '.ogg'] @@ -290,7 +290,7 @@ function handlePlayAudio(record) { // ========== 文件上传 ========== function handleBeforeUpload(file) { if (file.size > MAX_FILE_SIZE) { - message.error('文件大小不能超过 50MB') + message.error('文件大小不能超过 5MB') return false } diff --git a/frontend/app/web-gold/src/views/kling/types/identify-face.ts b/frontend/app/web-gold/src/views/kling/types/identify-face.ts index c4fbcb8078..3183b278ff 100644 --- a/frontend/app/web-gold/src/views/kling/types/identify-face.ts +++ b/frontend/app/web-gold/src/views/kling/types/identify-face.ts @@ -224,7 +224,8 @@ export interface LipSyncTaskData { kling_face_start_time: number kling_face_end_time: number ai_provider: string - voiceConfigId: string + voiceId?: string // 系统预置音色ID + voiceConfigId?: string // 用户配音ID(tik_user_voice.id) pre_generated_audio?: { audioBase64: string format: string diff --git a/frontend/app/web-gold/src/views/trends/Forecast.vue b/frontend/app/web-gold/src/views/trends/Forecast.vue index efb7e9d2d9..1481f42ba0 100644 --- a/frontend/app/web-gold/src/views/trends/Forecast.vue +++ b/frontend/app/web-gold/src/views/trends/Forecast.vue @@ -7,6 +7,7 @@ import { rewriteStream } from '@/api/forecast' import { getAgentList } from '@/api/agent' import { useUserStore } from '@/stores/user' import { getVoiceText } from '@gold/hooks/web/useVoiceText' +import { copyToClipboard } from '@/utils/clipboard' defineOptions({ name: 'ForecastView' }) @@ -88,10 +89,10 @@ function handleSearchKeypress(event) { } async function copyContent() { - try { - await navigator.clipboard.writeText(generatedContent.value) + const success = await copyToClipboard(generatedContent.value) + if (success) { message.success('已复制') - } catch { + } else { message.error('复制失败') } } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java deleted file mode 100644 index 0534ef43f7..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java +++ /dev/null @@ -1,355 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.client; - -import cn.hutool.core.collection.CollUtil; -import cn.hutool.core.util.StrUtil; -import cn.iocoder.yudao.framework.common.exception.ServiceException; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult; -import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig; -import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; -import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; -import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice; -import com.alibaba.dashscope.audio.ttsv2.enrollment.VoiceEnrollmentService; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import okhttp3.MediaType; -import okhttp3.OkHttpClient; -import okhttp3.Request; -import okhttp3.RequestBody; -import okhttp3.Response; -import org.springframework.stereotype.Component; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.time.Duration; -import java.util.Base64; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; -import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0; -import static cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants.VOICE_TTS_FAILED; - -/** - * CosyVoice 客户端 - */ -@Slf4j -@Component -@RequiredArgsConstructor -public class CosyVoiceClient { - - private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8"); - - private final CosyVoiceProviderConfig config; - private final ObjectMapper objectMapper; - - private volatile OkHttpClient httpClient; - - /** - * 调用 CosyVoice TTS 接口 - */ - public CosyVoiceTtsResult synthesize(CosyVoiceTtsRequest request) { - if (!config.isEnabled()) { - throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key"); - } - - // 添加详细的参数检查日志 - String text = request != null ? request.getText() : null; - log.error("[CosyVoice][TTS参数检查][request={}, text={}, voiceId={}, model={}]", - request != null ? "存在" : "为null", - text != null ? "'" + text + "' (长度:" + text.length() + ")" : "为null", - request != null ? request.getVoiceId() : null, - request != null ? request.getModel() : null); - - if (request == null || StrUtil.isBlank(request.getText())) { - throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空"); - } - if (StrUtil.isBlank(request.getVoiceId())) { - throw exception0(VOICE_TTS_FAILED.getCode(), "必须提供 voiceId"); - } - - SpeechSynthesizer synthesizer = null; - try { - log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, instruction={}]", - request.getVoiceId(), - request.getText().length(), - StrUtil.blankToDefault(request.getModel(), config.getDefaultModel()), - request.getSpeechRate(), - request.getInstruction()); - - // 使用 DashScope SDK 构建参数(严格按文档) - // 注意:speechRate 和 volume 需要转换为 int 类型 - SpeechSynthesisParam param = SpeechSynthesisParam.builder() - .apiKey(config.getApiKey()) - .model(StrUtil.blankToDefault(request.getModel(), config.getDefaultModel())) - .voice(request.getVoiceId()) - .speechRate(request.getSpeechRate() != null ? request.getSpeechRate().intValue() : 1) - .volume(request.getVolume() != null ? request.getVolume().intValue() : 0) - .build(); - - if (StrUtil.isNotBlank(request.getInstruction())) { - param.setInstruction(request.getInstruction()); - } - - log.error("[CosyVoice][SDK参数][param={}, text='{}']", param, request.getText()); - - // 初始化合成器(同步调用传 null) - synthesizer = new SpeechSynthesizer(param, null); - - // 阻塞调用,获取完整音频 - ByteBuffer audioData = synthesizer.call(request.getText()); - - if (audioData == null) { - throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频数据"); - } - - // 转换为字节数组(严格按照文档:直接使用 array()) - byte[] audioBytes = audioData.array(); - - log.info("[CosyVoice][TTS合成成功][Request ID: {}, audioSize={}, 首包延迟={}ms]", - synthesizer.getLastRequestId(), - audioBytes.length, - synthesizer.getFirstPackageDelay()); - - // 构建返回结果 - CosyVoiceTtsResult result = new CosyVoiceTtsResult(); - result.setAudio(audioBytes); - result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : config.getAudioFormat()); - result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate()); - result.setRequestId(synthesizer.getLastRequestId()); - result.setVoiceId(request.getVoiceId()); - - return result; - - } catch (ServiceException ex) { - throw ex; - } catch (Exception ex) { - log.error("[CosyVoice][TTS异常][voiceId={}, text={}]", request.getVoiceId(), request.getText(), ex); - throw exception(VOICE_TTS_FAILED); - } finally { - // 关闭 WebSocket 连接 - if (synthesizer != null) { - try { - synthesizer.getDuplexApi().close(1000, "任务结束"); - } catch (Exception e) { - log.warn("[CosyVoice][关闭连接失败]", e); - } - } - } - } - - /** - * 使用 HTTP API 进行 TTS 合成(备用方案) - */ - private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception { - String payload = objectMapper.writeValueAsString(buildPayload(request)); - Request httpRequest = new Request.Builder() - .url(config.getTtsUrl()) - .addHeader("Authorization", "Bearer " + config.getApiKey()) - .addHeader("Content-Type", "application/json") - .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON)) - .build(); - - try (Response response = getHttpClient().newCall(httpRequest).execute()) { - String body = response.body() != null ? response.body().string() : ""; - if (!response.isSuccessful()) { - log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body); - throw buildException(body); - } - return parseTtsResult(body, request); - } - } - - /** - * 调用 CosyVoice 语音复刻接口(声音注册) - */ - public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) { - if (!config.isEnabled()) { - throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key"); - } - if (request == null || StrUtil.isBlank(request.getUrl())) { - throw exception0(VOICE_TTS_FAILED.getCode(), "复刻音频URL不能为空"); - } - if (request == null || StrUtil.isBlank(request.getTargetModel())) { - throw exception0(VOICE_TTS_FAILED.getCode(), "复刻模型不能为空"); - } - if (request == null || StrUtil.isBlank(request.getPrefix())) { - throw exception0(VOICE_TTS_FAILED.getCode(), "音色前缀不能为空"); - } - - try { - log.info("[CosyVoice][开始语音复刻][targetModel={}, prefix={}, url={}]", - request.getTargetModel(), request.getPrefix(), request.getUrl()); - - // 使用 DashScope SDK 创建语音复刻 - VoiceEnrollmentService service = new VoiceEnrollmentService(config.getApiKey()); - Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl()); - - log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]", - service.getLastRequestId(), voice.getVoiceId()); - - // 构建返回结果 - CosyVoiceCloneResult result = new CosyVoiceCloneResult(); - result.setVoiceId(voice.getVoiceId()); - result.setRequestId(service.getLastRequestId()); - - return result; - } catch (ServiceException ex) { - throw ex; - } catch (Exception ex) { - log.error("[CosyVoice][语音复刻异常][targetModel={}, prefix={}]", - request.getTargetModel(), request.getPrefix(), ex); - throw exception(VOICE_TTS_FAILED); - } - } - - private Map buildPayload(CosyVoiceTtsRequest request) { - Map payload = new HashMap<>(); - String model = StrUtil.blankToDefault(request.getModel(), config.getDefaultModel()); - payload.put("model", model); - - Map input = new HashMap<>(); - input.put("text", request.getText()); - - // 优先使用fileUrl(语音克隆),否则使用voiceId(系统音色) - if (StrUtil.isNotBlank(request.getFileUrl())) { - // 直接使用预签名URL(带签名和时效),阿里云API需要这个签名URL - input.put("audio_url", request.getFileUrl()); - log.info("[CosyVoice][使用语音克隆][audio_url={}]", request.getFileUrl()); - - // 如果提供了参考文本,也一并传递(用于提高语音克隆质量) - if (StrUtil.isNotBlank(request.getReferenceText())) { - input.put("reference_text", request.getReferenceText()); - log.info("[CosyVoice][添加参考文本][length={}]", request.getReferenceText().length()); - } - } else { - // 使用系统音色 - String voiceId = StrUtil.blankToDefault(request.getVoiceId(), config.getDefaultVoiceId()); - if (StrUtil.isNotBlank(voiceId)) { - input.put("voice", voiceId); - log.info("[CosyVoice][使用系统音色][voice={}]", voiceId); - } else { - log.warn("[CosyVoice][未提供voiceId或fileUrl]"); - } - } - payload.put("input", input); - - Map parameters = new HashMap<>(); - int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate(); - parameters.put("sample_rate", sampleRate); - - // 根据官方文档,统一使用小写格式 - String format = StrUtil.blankToDefault(request.getAudioFormat(), config.getAudioFormat()).toLowerCase(); - parameters.put("format", format); - - if (request.getSpeechRate() != null) { - parameters.put("speech_rate", request.getSpeechRate()); - } - if (request.getVolume() != null) { - // 文档显示volume范围是0-100 - parameters.put("volume", Math.round(request.getVolume())); - } - if (request.isPreview()) { - parameters.put("preview", true); - } - - payload.put("parameters", parameters); - - // 打印完整请求体(用于调试) - log.info("[CosyVoice][请求参数][model={}, sample_rate={}, format={}, text_length={}]", - model, sampleRate, format, request.getText().length()); - - return payload; - } - - private CosyVoiceTtsResult parseTtsResult(String body, CosyVoiceTtsRequest request) throws Exception { - JsonNode root = objectMapper.readTree(body); - - // 错误响应包含 code 字段 - if (root.has("code")) { - String message = root.has("message") ? root.get("message").asText() : body; - log.error("[CosyVoice][TTS失败][code={}, message={}]", root.get("code").asText(), message); - throw exception0(VOICE_TTS_FAILED.getCode(), message); - } - - JsonNode audioNode = root.path("output").path("audio"); - if (!audioNode.isArray() || audioNode.isEmpty()) { - throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回的音频为空"); - } - - JsonNode firstAudio = audioNode.get(0); - String content = firstAudio.path("content").asText(); - if (StrUtil.isBlank(content)) { - throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频内容"); - } - - byte[] audioBytes = Base64.getDecoder().decode(content); - CosyVoiceTtsResult result = new CosyVoiceTtsResult(); - result.setAudio(audioBytes); - result.setFormat(firstAudio.path("format").asText(StrUtil.blankToDefault(request.getAudioFormat(), config.getAudioFormat()))); - result.setSampleRate(firstAudio.path("sample_rate").asInt(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate())); - result.setRequestId(root.path("request_id").asText()); - result.setVoiceId(firstAudio.path("voice").asText(request.getVoiceId())); - return result; - } - - private OkHttpClient getHttpClient() { - if (httpClient == null) { - synchronized (this) { - if (httpClient == null) { - java.time.Duration connect = defaultDuration(config.getConnectTimeout(), 10); - java.time.Duration read = defaultDuration(config.getReadTimeout(), 60); - httpClient = new OkHttpClient.Builder() - .connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS) - .readTimeout(read.toMillis(), TimeUnit.MILLISECONDS) - .build(); - } - } - } - return httpClient; - } - - private Duration defaultDuration(Duration duration, long seconds) { - return duration == null ? Duration.ofSeconds(seconds) : duration; - } - - private ServiceException buildException(String body) { - try { - JsonNode root = objectMapper.readTree(body); - String message = CollUtil.getFirst( - CollUtil.newArrayList( - root.path("message").asText(null), - root.path("output").path("message").asText(null))); - return exception0(VOICE_TTS_FAILED.getCode(), StrUtil.blankToDefault(message, "CosyVoice 调用失败")); - } catch (Exception ignored) { - return exception0(VOICE_TTS_FAILED.getCode(), body); - } - } - - /** - * 从URL中提取原始URL(去除查询参数和锚点) - * - * @param url 可能包含查询参数的URL - * @return 原始URL(去除查询参数和锚点) - */ - private String extractRawUrl(String url) { - if (StrUtil.isBlank(url)) { - return url; - } - try { - java.net.URL urlObj = new java.net.URL(url); - // 只使用协议、主机、路径部分,忽略查询参数和锚点 - return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath(); - } catch (Exception e) { - // 如果URL解析失败,使用简单方式去除查询参数 - return url.split("\\?")[0].split("#")[0]; - } - } -} - - diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java deleted file mode 100644 index 91473c5df0..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java +++ /dev/null @@ -1,124 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.client; - -import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult; -import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult; -import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig; -import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Component; - -/** - * CosyVoice Provider 实现 - * - *

阿里云 CosyVoice 语音服务的 Provider 实现。 - * 内部委托给 {@link CosyVoiceClient} 进行实际的API调用。 - * - * @author 芋道源码 - */ -@Slf4j -@Component -@RequiredArgsConstructor -public class CosyVoiceProvider implements VoiceCloneProvider { - - private final CosyVoiceClient cosyVoiceClient; - private final VoiceProviderProperties voiceProviderProperties; - - /** - * 获取 CosyVoice 配置 - */ - private CosyVoiceProviderConfig getConfig() { - var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice"); - if (baseConfig instanceof CosyVoiceProviderConfig config) { - return config; - } - return new CosyVoiceProviderConfig(); - } - - @Override - public VoiceCloneResult cloneVoice(VoiceCloneRequest request) { - log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]", - request.getAudioUrl(), request.getModel()); - - // 适配到 CosyVoiceCloneRequest - cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest cosyRequest = - new cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest(); - - cosyRequest.setUrl(request.getAudioUrl()); - cosyRequest.setTargetModel(request.getModel()); - cosyRequest.setPrefix(request.getPrefix()); - if (request.getSampleRate() != null) { - cosyRequest.setSampleRate(request.getSampleRate()); - } - if (request.getAudioFormat() != null) { - cosyRequest.setAudioFormat(request.getAudioFormat()); - } - - // 调用底层 Client - cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult cosyResult = - cosyVoiceClient.cloneVoice(cosyRequest); - - // 适配到统一 Result - VoiceCloneResult result = new VoiceCloneResult(); - result.setVoiceId(cosyResult.getVoiceId()); - result.setRequestId(cosyResult.getRequestId()); - - log.info("[CosyVoiceProvider][语音克隆成功][voiceId={}]", result.getVoiceId()); - return result; - } - - @Override - public VoiceTtsResult synthesize(VoiceTtsRequest request) { - log.info("[CosyVoiceProvider][语音合成][voiceId={}, textLength={}, model={}]", - request.getVoiceId(), - request.getText() != null ? request.getText().length() : 0, - request.getModel()); - - // 适配到 CosyVoiceTtsRequest - cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest cosyRequest = - cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest.builder() - .text(request.getText()) - .voiceId(request.getVoiceId()) - .fileUrl(request.getFileUrl()) - .referenceText(request.getReferenceText()) - .model(request.getModel()) - .speechRate(request.getSpeechRate()) - .volume(request.getVolume()) - .instruction(request.getInstruction()) - .sampleRate(request.getSampleRate()) - .audioFormat(request.getAudioFormat()) - .preview(request.isPreview()) - .build(); - - log.error("[CosyVoiceProvider][构建的cosyRequest][text='{}', voiceId={}, fileUrl={}]", - cosyRequest.getText(), cosyRequest.getVoiceId(), cosyRequest.getFileUrl()); - - // 调用底层 Client - cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult = - cosyVoiceClient.synthesize(cosyRequest); - - // 适配到统一 Result - VoiceTtsResult result = new VoiceTtsResult(); - result.setRequestId(cosyResult.getRequestId()); - result.setFormat(cosyResult.getFormat()); - result.setSampleRate(cosyResult.getSampleRate()); - result.setAudio(cosyResult.getAudio()); - result.setVoiceId(cosyResult.getVoiceId()); - - log.info("[CosyVoiceProvider][语音合成成功][format={}, audioSize={}]", - result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0); - return result; - } - - @Override - public boolean supports(String providerType) { - return "cosyvoice".equalsIgnoreCase(providerType); - } - - @Override - public String getProviderType() { - return "cosyvoice"; - } -} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java index 18b0412d1f..24304db67e 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java @@ -4,6 +4,7 @@ import cn.hutool.core.util.StrUtil; import cn.hutool.http.HttpRequest; import cn.hutool.http.HttpResponse; import cn.hutool.json.JSONUtil; +import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowReference; import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowTtsRequest; import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadRequest; import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadResponse; @@ -21,6 +22,7 @@ import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.net.URL; import java.util.Base64; +import java.util.Collections; /** * 硅基流动 Provider 实现 @@ -108,19 +110,42 @@ public class SiliconFlowProvider implements VoiceCloneProvider { throw new RuntimeException("硅基流动供应商未配置或已禁用"); } - log.info("[SiliconFlowProvider][语音合成][voiceId={}, textLength={}, model={}]", + // 判断使用哪种模式 + boolean useReferenceMode = StrUtil.isBlank(request.getVoiceId()) + && StrUtil.isNotBlank(request.getFileUrl()); + + log.info("[SiliconFlowProvider][语音合成][voiceId={}, fileUrl={}, textLength={}, model={}, mode={}]", request.getVoiceId(), + request.getFileUrl() != null ? "存在" : "无", request.getText() != null ? request.getText().length() : 0, - request.getModel()); + request.getModel(), + useReferenceMode ? "动态音色" : "标准音色"); try { - SiliconFlowTtsRequest sfRequest = SiliconFlowTtsRequest.builder() + SiliconFlowTtsRequest.SiliconFlowTtsRequestBuilder requestBuilder = SiliconFlowTtsRequest.builder() .model(getOrDefault(request.getModel(), getOrDefault(config.getDefaultModel(), "IndexTeam/IndexTTS-2"))) .input(request.getText()) - .voice(request.getVoiceId()) .speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f) .responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat())) - .build(); + .gain(request.getVolume()); + + if (useReferenceMode) { + // 用户动态音色模式:voice 传空,使用 references + log.info("[SiliconFlowProvider][使用动态音色模式][fileUrl={}]", request.getFileUrl()); + requestBuilder.voice(""); + + SiliconFlowReference reference = SiliconFlowReference.builder() + .audio(request.getFileUrl()) + .text(request.getReferenceText()) + .build(); + requestBuilder.references(Collections.singletonList(reference)); + } else { + // 标准模式:使用 voiceId + log.info("[SiliconFlowProvider][使用标准音色模式][voiceId={}]", request.getVoiceId()); + requestBuilder.voice(request.getVoiceId()); + } + + SiliconFlowTtsRequest sfRequest = requestBuilder.build(); String url = config.getBaseUrl() + config.getTtsUrl(); String requestBody = JSONUtil.toJsonStr(sfRequest); @@ -141,15 +166,16 @@ public class SiliconFlowProvider implements VoiceCloneProvider { } byte[] audioBytes = response.bodyBytes(); - String base64Audio = Base64.getEncoder().encodeToString(audioBytes); VoiceTtsResult result = new VoiceTtsResult(); - result.setAudio(Base64.getDecoder().decode(base64Audio)); + result.setAudio(audioBytes); result.setFormat(sfRequest.getResponseFormat()); result.setVoiceId(request.getVoiceId()); - log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]", - result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0); + log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}, mode={}]", + result.getFormat(), + result.getAudio() != null ? result.getAudio().length : 0, + useReferenceMode ? "动态音色" : "标准音色"); return result; } catch (Exception e) { diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java index 99763ec84e..5e7fac4ad8 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java @@ -41,7 +41,7 @@ public interface VoiceCloneProvider { /** * 检查是否支持指定的供应商类型 * - * @param providerType 供应商类型(如 "cosyvoice", "siliconflow") + * @param providerType 供应商类型(如 "siliconflow") * @return true 如果支持,false 否则 */ boolean supports(String providerType); @@ -49,7 +49,7 @@ public interface VoiceCloneProvider { /** * 获取供应商类型标识 * - * @return 供应商类型,如 "cosyvoice", "siliconflow" + * @return 供应商类型,如 "siliconflow" */ String getProviderType(); } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java index e25e1e2102..0b448c7b6e 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java @@ -75,7 +75,7 @@ public class VoiceCloneProviderFactory { /** * 根据类型获取 Provider * - * @param providerType 供应商类型(如 "cosyvoice", "siliconflow") + * @param providerType 供应商类型(如 "siliconflow") * @return 对应的 Provider 实例 * @throws ServiceException 当 Provider 不存在时抛出 */ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java deleted file mode 100644 index 30716f18fc..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java +++ /dev/null @@ -1,36 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.client.dto; - -import lombok.Data; - -/** - * CosyVoice 语音复刻请求 - */ -@Data -public class CosyVoiceCloneRequest { - - /** - * 复刻模型(cosyvoice-v3-flash 等) - */ - private String targetModel; - - /** - * 音色自定义前缀(仅允许数字和小写字母,长度<10字符) - */ - private String prefix; - - /** - * 音频文件公网URL - */ - private String url; - - /** - * 采样率,默认24000 - */ - private Integer sampleRate; - - /** - * 音频格式,默认wav - */ - private String audioFormat; - -} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java deleted file mode 100644 index d01421a269..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java +++ /dev/null @@ -1,21 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.client.dto; - -import lombok.Data; - -/** - * CosyVoice 语音复刻结果 - */ -@Data -public class CosyVoiceCloneResult { - - /** - * 生成的 voice_id - */ - private String voiceId; - - /** - * 请求ID - */ - private String requestId; - -} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java deleted file mode 100644 index fec199cbed..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java +++ /dev/null @@ -1,69 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.client.dto; - -import lombok.Builder; -import lombok.Data; - -/** - * CosyVoice TTS 请求 - */ -@Data -@Builder -public class CosyVoiceTtsRequest { - - /** - * 待合成文本 - */ - private String text; - - /** - * 声音 ID(可选,默认使用配置) - */ - private String voiceId; - - /** - * 语音文件URL(当使用语音URL合成时使用,替代voiceId) - */ - private String fileUrl; - - /** - * 参考音频文本(当使用fileUrl时,用于提高克隆质量) - */ - private String referenceText; - - /** - * 模型(默认 cosyvoice-v3-flash) - */ - private String model; - - /** - * 语速 - */ - private Float speechRate; - - /** - * 音量,可选 - */ - private Float volume; - - /** - * 指令(用于控制音色风格),可选 - */ - private String instruction; - - /** - * 采样率 - */ - private Integer sampleRate; - - /** - * 音频格式 - */ - private String audioFormat; - - /** - * 是否仅用于试听,方便服务侧做限流 - */ - private boolean preview; -} - - diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsResult.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsResult.java deleted file mode 100644 index 3a100fff4a..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsResult.java +++ /dev/null @@ -1,37 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.client.dto; - -import lombok.Data; - -/** - * CosyVoice TTS 响应 - */ -@Data -public class CosyVoiceTtsResult { - - /** - * 请求ID - */ - private String requestId; - - /** - * 返回的音频格式 - */ - private String format; - - /** - * 采样率 - */ - private Integer sampleRate; - - /** - * 音频二进制内容 - */ - private byte[] audio; - - /** - * 音频所使用的 voiceId - */ - private String voiceId; -} - - diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowReference.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowReference.java new file mode 100644 index 0000000000..d02c5d4e44 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowReference.java @@ -0,0 +1,27 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Builder; +import lombok.Data; + +/** + * 硅基流动参考音频配置 + * + *

用于用户动态音色模式,通过 references 传递参考音频实现实时语音克隆。 + * + * @author 芋道源码 + */ +@Data +@Builder +public class SiliconFlowReference { + + /** + * 参考音频 URL(也支持 base64 格式) + */ + private String audio; + + /** + * 参考音频的文字内容 + */ + private String text; + +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java index 5322bf8f08..1d078223f1 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java @@ -4,6 +4,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Builder; import lombok.Data; +import java.util.List; + /** * 硅基流动文本转语音请求 * @@ -42,4 +44,18 @@ public class SiliconFlowTtsRequest { @JsonProperty("response_format") private String responseFormat; + /** + * 音量增益(-10 到 10,默认 0) + * + *

正值增加音量,负值降低音量 + */ + private Float gain; + + /** + * 参考音频列表(用于用户动态音色模式) + * + *

当 voice 为空时,使用此字段传递参考音频实现实时语音克隆 + */ + private List references; + } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java index 5d3be6edf6..ad545a3212 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java @@ -16,7 +16,6 @@ public class VoiceCloneRequest { /** * 音频文件公网URL * - *

CosyVoice: 对应 {@code url} 字段

*

SiliconFlow: 对应 {@code audio} 字段(需base64编码)

*/ private String audioUrl; @@ -24,7 +23,6 @@ public class VoiceCloneRequest { /** * 模型名称 * - *

CosyVoice: 对应 {@code targetModel},如 {@code cosyvoice-v3-flash}

*

SiliconFlow: 对应 {@code model},如 {@code indextts-2}

*/ private String model; @@ -32,7 +30,6 @@ public class VoiceCloneRequest { /** * 音色自定义前缀(可选) * - *

CosyVoice: 必填,仅允许数字和小写字母,长度<10字符

*

SiliconFlow: 不适用

*/ private String prefix; @@ -53,7 +50,6 @@ public class VoiceCloneRequest { * 转录文本(可选) * *

SiliconFlow: 音频对应的文本内容

- *

CosyVoice: 不适用

*/ private String transcriptionText; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java deleted file mode 100644 index 310122fed8..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java +++ /dev/null @@ -1,68 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.config; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.stereotype.Component; - -import java.time.Duration; - -/** - * CosyVoice 供应商配置 - * - *

继承通用配置,添加 CosyVoice 特有字段。 - * - * @author 芋道源码 - */ -@Data -@EqualsAndHashCode(callSuper = true) -@Component -@ConfigurationProperties(prefix = "yudao.voice.cosyvoice") -public class CosyVoiceProviderConfig extends VoiceProviderProperties.ProviderConfig { - - /** - * 默认模型 - */ - private String defaultModel = "cosyvoice-v3-flash"; - - /** - * 默认 voiceId(可选) - */ - private String defaultVoiceId; - - /** - * 默认采样率 - */ - private Integer sampleRate = 24000; - - /** - * 默认音频格式 - */ - private String audioFormat = "mp3"; - - /** - * 试听默认示例文本 - */ - private String previewText = "您好,欢迎体验专属音色。"; - - /** - * TTS 接口地址 - */ - private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis"; - - /** - * 语音复刻接口地址(声音注册) - */ - private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment"; - - /** - * 连接超时时间 - */ - private Duration connectTimeout = Duration.ofSeconds(10); - - /** - * 读取超时时间(3分钟,提升语音合成成功率) - */ - private Duration readTimeout = Duration.ofSeconds(180); - -} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceAutoConfiguration.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceAutoConfiguration.java index bff932ddbc..244b431e60 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceAutoConfiguration.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceAutoConfiguration.java @@ -1,8 +1,5 @@ package cn.iocoder.yudao.module.tik.voice.config; -import lombok.extern.slf4j.Slf4j; -import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; -import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; /** @@ -11,25 +8,6 @@ import org.springframework.context.annotation.Configuration; * @author 芋道源码 */ @Configuration -@Slf4j public class VoiceAutoConfiguration { - /** - * CosyVoice 供应商配置 Bean - */ - @Bean - @ConditionalOnProperty(prefix = "yudao.voice.providers.cosyvoice", name = "enabled", havingValue = "true", matchIfMissing = true) - public CosyVoiceProviderConfig cosyVoiceProviderConfig(VoiceProviderProperties properties) { - VoiceProviderProperties.ProviderConfig baseConfig = properties.getProviderConfig("cosyvoice"); - if (baseConfig == null) { - baseConfig = new VoiceProviderProperties.ProviderConfig(); - } - - CosyVoiceProviderConfig config = new CosyVoiceProviderConfig(); - config.setEnabled(baseConfig.isEnabled()); - config.setApiKey(baseConfig.getApiKey()); - config.setPriority(baseConfig.getPriority()); - return config; - } - } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java index f97ac57b0e..8ba085a74b 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java @@ -22,14 +22,14 @@ public class VoiceProviderProperties { /** * 默认供应商类型 * - *

可选值: cosyvoice, siliconflow 等 + *

可选值: siliconflow 等 */ - private String defaultProvider = "cosyvoice"; + private String defaultProvider = "siliconflow"; /** * 各供应商配置 * - *

key 为供应商类型(如 cosyvoice, siliconflow) + *

key 为供应商类型(如 siliconflow) */ private Map providers = new HashMap<>(); diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikUserVoiceController.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikUserVoiceController.java index 45b5532806..f4fe3d5d95 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikUserVoiceController.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikUserVoiceController.java @@ -80,7 +80,7 @@ public class AppTikUserVoiceController { } @PostMapping("/tts") - @Operation(summary = "CosyVoice 文本转语音") + @Operation(summary = "文本转语音") public CommonResult synthesizeVoice(@Valid @RequestBody AppTikVoiceTtsReqVO reqVO) { return success(voiceService.synthesizeVoice(reqVO)); } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java index 0054210706..1ecd9831a1 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java @@ -55,9 +55,13 @@ public class TikDigitalHumanTaskDO extends TenantBaseDO { // ========== TTS参数 ========== /** - * 音色ID(CosyVoice voiceId) + * 音色ID(系统音色使用) */ private String voiceId; + /** + * 用户配音ID(tik_user_voice.id,用户配音使用) + */ + private Long voiceConfigId; /** * 输入文本(用于语音合成) */ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java index 6e2a666610..4ab397b217 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java @@ -54,10 +54,6 @@ public class TikUserVoiceDO extends TenantBaseDO { * 备注信息 */ private String note; - /** - * 复刻音色ID(CosyVoice 语音复刻生成的 voice_id) - */ - private String voiceId; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java deleted file mode 100644 index afecb99344..0000000000 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java +++ /dev/null @@ -1,38 +0,0 @@ -package cn.iocoder.yudao.module.tik.voice.enums; - -import cn.hutool.core.util.StrUtil; -import lombok.AllArgsConstructor; -import lombok.Getter; - -/** - * CosyVoice情感枚举 - * 根据阿里云DashScope官方文档定义 - * 参考:https://help.aliyun.com/zh/dashscope/developer-reference/tts-api - */ -@Getter -@AllArgsConstructor -public enum CosyVoiceEmotionEnum { - - NEUTRAL("neutral", "中性"), - HAPPY("happy", "高兴"), - SAD("sad", "悲伤"), - ANGRY("angry", "愤怒"), - SURPRISED("surprised", "惊讶"), - DISGUSTED("disgusted", "厌恶"), - SCARED("scared", "害怕"); - - private final String code; - private final String description; - - public static CosyVoiceEmotionEnum getByCode(String code) { - if (StrUtil.isBlank(code)) { - return NEUTRAL; - } - for (CosyVoiceEmotionEnum emotion : values()) { - if (emotion.getCode().equalsIgnoreCase(code)) { - return emotion; - } - } - return NEUTRAL; - } -} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java index b6a50da7de..822cae24e2 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java @@ -390,8 +390,9 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService { throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN, "无权访问该音色"); } - if (StrUtil.isBlank(userVoice.getVoiceId())) { - throw new IllegalArgumentException("该音色配置无效,缺少voiceId"); + // 验证识别文本是否存在(用于动态音色模式) + if (StrUtil.isBlank(userVoice.getTranscription())) { + throw new IllegalArgumentException("该音色配置无效,请先进行语音识别"); } } @@ -399,14 +400,8 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService { * 创建任务记录 */ private TikDigitalHumanTaskDO createTaskRecord(AppTikDigitalHumanCreateReqVO reqVO, Long userId) { - // 如果是用户音色,需要从voiceConfigId获取voiceId + // 直接使用前端传递的 voiceId(系统预置音色),用户音色通过 voiceConfigId 在合成时处理 String voiceId = reqVO.getVoiceId(); - if (voiceId == null && reqVO.getVoiceConfigId() != null) { - TikUserVoiceDO userVoice = userVoiceMapper.selectById(reqVO.getVoiceConfigId()); - if (userVoice != null) { - voiceId = userVoice.getVoiceId(); - } - } // ✅ 预生成音频信息(无需存储时长,前端严格校验) if (reqVO.getPreGeneratedAudio() != null) { @@ -427,6 +422,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService { .videoFileId(reqVO.getVideoFileId()) .videoUrl(reqVO.getVideoUrl()) .voiceId(voiceId) + .voiceConfigId(reqVO.getVoiceConfigId()) .inputText(reqVO.getInputText()) .speechRate(reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f) .volume(reqVO.getVolume() != null ? reqVO.getVolume() : 0f) @@ -550,7 +546,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService { } /** - * 语音合成(使用CosyVoice v3 Flash) + * 语音合成 */ private String synthesizeVoice(TikDigitalHumanTaskDO task) throws Exception { // ✅ 优先使用预生成的音频(前端传递) @@ -561,21 +557,25 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService { } // 如果没有预生成音频,则走正常的TTS流程 - // 参数验证 - if (StrUtil.isBlank(task.getVoiceId())) { - throw new Exception("音色ID不能为空"); + // 参数验证:voiceId(系统音色)和 voiceConfigId(用户配音)二选一 + boolean hasVoiceId = StrUtil.isNotBlank(task.getVoiceId()); + boolean hasVoiceConfigId = task.getVoiceConfigId() != null; + + if (!hasVoiceId && !hasVoiceConfigId) { + throw new Exception("音色ID不能为空(需提供voiceId或voiceConfigId)"); } if (StrUtil.isBlank(task.getInputText())) { throw new Exception("输入文本不能为空"); } - log.info("[synthesizeVoice][任务({})开始语音合成][voiceId={}, textLength={}]", - task.getId(), task.getVoiceId(), task.getInputText().length()); + log.info("[synthesizeVoice][任务({})开始语音合成][voiceId={}, voiceConfigId={}, textLength={}]", + task.getId(), task.getVoiceId(), task.getVoiceConfigId(), task.getInputText().length()); // 构建TTS请求参数 AppTikVoiceTtsReqVO ttsReqVO = new AppTikVoiceTtsReqVO(); ttsReqVO.setInputText(task.getInputText()); - ttsReqVO.setVoiceId(task.getVoiceId()); + ttsReqVO.setVoiceId(task.getVoiceId()); // 系统音色 + ttsReqVO.setVoiceConfigId(task.getVoiceConfigId()); // 用户配音 ttsReqVO.setSpeechRate(task.getSpeechRate() != null ? task.getSpeechRate() : 1.0f); ttsReqVO.setVolume(task.getVolume() != null ? task.getVolume() : 0f); ttsReqVO.setInstruction(task.getInstruction()); diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceService.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceService.java index c6ebaf5bbf..987dbaddfa 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceService.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceService.java @@ -63,7 +63,7 @@ public interface TikUserVoiceService { void transcribeVoice(Long id); /** - * CosyVoice 文本转语音 + * 文本转语音 */ AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO); diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java index c9c6fe1082..3bddba7831 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java @@ -22,8 +22,6 @@ import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService; import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX; import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProvider; import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProviderFactory; -import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest; -import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult; import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest; import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult; import cn.iocoder.yudao.module.tik.muye.aimodelconfig.dal.AiModelConfigDO; @@ -89,9 +87,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { @Resource private VoiceCloneProviderFactory voiceProviderFactory; - @Resource - private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig cosyVoiceProviderConfig; - @Resource private StringRedisTemplate stringRedisTemplate; @@ -102,22 +97,16 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { private static final long PREVIEW_CACHE_TTL_SECONDS = 3600; private static final long SYNTH_CACHE_TTL_SECONDS = 24 * 3600; - /** 供应商类型常量 */ - private static final String PROVIDER_COSYVOICE = "cosyvoice"; - private static final String PROVIDER_SILICONFLOW = "siliconflow"; - - /** 模型常量 */ - private static final String MODEL_COSYVOICE = "cosyvoice-v3-flash"; - private static final String MODEL_SILICONFLOW = "IndexTeam/IndexTTS-2"; - /** 积分平台和类型常量 */ private static final String PLATFORM_VOICE = "voice"; private static final String MODEL_CODE_TTS = "tts"; - private static final String MODEL_CODE_CLONE = "clone"; @Resource private PointsService pointsService; + /** SiliconFlow 参考音频最大大小:5MB */ + private static final int MAX_REFERENCE_AUDIO_SIZE = 5 * 1024 * 1024; + @Override @Transactional(rollbackFor = Exception.class) public Long createVoice(AppTikUserVoiceCreateReqVO createReqVO) { @@ -128,7 +117,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { if (fileDO == null) { throw exception(VOICE_FILE_NOT_EXISTS); } - + + // 校验文件大小(SiliconFlow API 限制参考音频不超过 5MB) + if (fileDO.getSize() != null && fileDO.getSize() > MAX_REFERENCE_AUDIO_SIZE) { + double sizeMB = fileDO.getSize() / (1024.0 * 1024.0); + throw exception(VOICE_FILE_NOT_EXISTS, + String.format("音频文件过大(%.1fMB),请上传小于5MB的音频文件", sizeMB)); + } + // 验证文件分类是否为voice(通过tik_user_file表查询) TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX() .eq(TikUserFileDO::getFileId, createReqVO.getFileId()) @@ -158,51 +154,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .setTranscription(createReqVO.getText()); // 使用前端传入的文本 voiceMapper.insert(voice); - // 4. 调用语音克隆服务,生成 voice_id - if (StrUtil.isNotBlank(createReqVO.getText())) { - try { - // 4.1 获取积分配置并预检 - AiModelConfigDO config = pointsService.getConfig(PLATFORM_VOICE, MODEL_CODE_CLONE); - pointsService.checkPoints(userId.toString(), config.getConsumePoints()); - - log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]", - voice.getId(), fileDO.getId(), createReqVO.getProviderType()); - String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - - VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType()); - String providerType = getProviderType(createReqVO.getProviderType(), provider); - String model = getModelByProvider(providerType); - - VoiceCloneRequest cloneRequest = new VoiceCloneRequest(); - cloneRequest.setAudioUrl(fileAccessUrl); - cloneRequest.setModel(model); - cloneRequest.setPrefix("voice" + voice.getId()); - cloneRequest.setTranscriptionText(createReqVO.getText()); // 使用前端传入的文本 - - VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest); - String voiceId = cloneResult.getVoiceId(); - - voice.setVoiceId(voiceId); - voiceMapper.updateById(voice); - - // 4.2 音色克隆成功,扣减积分 - try { - pointsService.deductPoints(userId.toString(), config.getConsumePoints(), "voice_clone", voice.getId().toString()); - log.info("[createVoice][用户 {} 扣减 {} 积分(音色克隆)]", userId, config.getConsumePoints()); - } catch (Exception e) { - log.error("[createVoice][积分扣减失败: {}]", e.getMessage()); - } - - log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId); - } catch (Exception e) { - log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e); - // 失败不扣费 - } - } else { - log.info("[createVoice][未提供文本,跳过语音复刻,配音编号({})]", voice.getId()); - } - - log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId()); return voice.getId(); } @@ -300,10 +251,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .collect(Collectors.toList()); if (CollUtil.isNotEmpty(fileIds)) { - List files = fileMapper.selectBatchIds(fileIds); - Map tempFileMap = files.stream() - .collect(Collectors.toMap(FileDO::getId, file -> file)); - fileMap.putAll(tempFileMap); + fileMapper.selectBatchIds(fileIds).forEach(file -> fileMap.put(file.getId(), file)); } } @@ -412,26 +360,18 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户"); } - // 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据) - if (StrUtil.isNotBlank(voice.getVoiceId())) { - log.info("[synthesizeVoice][使用复刻音色ID合成,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId()); - voiceId = voice.getVoiceId(); - // 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText - transcriptionText = null; // 清除 transcriptionText,让 determineSynthesisText 只使用 inputText - } else { - log.info("[synthesizeVoice][使用文件URL合成,配音编号({})]", voiceConfigId); - // 获取文件信息,用于获取文件URL - FileDO fileDO = fileMapper.selectById(voice.getFileId()); - if (fileDO == null) { - throw exception(VOICE_FILE_NOT_EXISTS); - } + // 使用动态音色模式(fileUrl + transcriptionText) + log.info("[synthesizeVoice][使用动态音色模式,配音编号({})]", voiceConfigId); + FileDO fileDO = fileMapper.selectById(voice.getFileId()); + if (fileDO == null) { + throw exception(VOICE_FILE_NOT_EXISTS); + } - // 使用文件URL和识别文本进行合成 - fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - transcriptionText = voice.getTranscription(); - if (StrUtil.isBlank(transcriptionText)) { - throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); - } + // 使用文件URL和识别文本进行合成 + fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); + transcriptionText = voice.getTranscription(); + if (StrUtil.isBlank(transcriptionText)) { + throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); } } // 2. 如果没有配置ID,使用voiceId或fileUrl(系统音色或直接URL方式) @@ -555,21 +495,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { throw exception(VOICE_NOT_EXISTS, "配音不存在"); } - voiceId = voice.getVoiceId(); - if (StrUtil.isNotBlank(voiceId)) { - fileUrl = null; - referenceText = null; - } else { - FileDO fileDO = fileMapper.selectById(voice.getFileId()); - if (fileDO == null) { - throw exception(VOICE_FILE_NOT_EXISTS); - } - fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - referenceText = voice.getTranscription(); - if (StrUtil.isBlank(referenceText)) { - throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); - } + // 使用动态音色模式 + FileDO fileDO = fileMapper.selectById(voice.getFileId()); + if (fileDO == null) { + throw exception(VOICE_FILE_NOT_EXISTS); } + fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); + referenceText = voice.getTranscription(); + if (StrUtil.isBlank(referenceText)) { + throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); + } + voiceId = null; } // 3. 系统配音 else { @@ -623,21 +559,10 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return buildPreviewResp(audioBase64, format, voiceId); } - /** - * 获取 CosyVoice 配置 - */ - private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() { - return cosyVoiceProviderConfig; - } - /** * 获取默认音频格式 */ private String getDefaultFormat() { - var config = getCosyVoiceConfig(); - if (config != null) { - return config.getAudioFormat(); - } return "mp3"; } @@ -645,10 +570,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { * 获取默认采样率 */ private Integer getDefaultSampleRate() { - var config = getCosyVoiceConfig(); - if (config != null) { - return config.getSampleRate(); - } return 24000; } @@ -664,16 +585,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { } private String resolveContentType(String format) { - if ("wav".equalsIgnoreCase(format)) { - return "audio/wav"; - } - if ("mp3".equalsIgnoreCase(format)) { + if (format == null) { return "audio/mpeg"; } - if ("flac".equalsIgnoreCase(format)) { - return "audio/flac"; - } - return "audio/mpeg"; + return switch (format.toLowerCase()) { + case "wav" -> "audio/wav"; + case "flac" -> "audio/flac"; + default -> "audio/mpeg"; + }; } private String determineSynthesisText(String transcriptionText, String inputText, boolean allowFallback) { @@ -828,74 +747,25 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return respVO; } + @lombok.Data + @lombok.NoArgsConstructor + @lombok.AllArgsConstructor private static class PreviewCacheEntry { private String audioBase64; private String format; private Integer sampleRate; private String requestId; - - public PreviewCacheEntry() {} - - public PreviewCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId) { - this.audioBase64 = audioBase64; - this.format = format; - this.sampleRate = sampleRate; - this.requestId = requestId; - } - - public String getAudioBase64() { - return audioBase64; - } - - public String getFormat() { - return format; - } - - public Integer getSampleRate() { - return sampleRate; - } - - public String getRequestId() { - return requestId; - } } + @lombok.Data + @lombok.NoArgsConstructor + @lombok.AllArgsConstructor private static class SynthCacheEntry { private String audioBase64; private String format; private Integer sampleRate; private String requestId; private String voiceId; - - public SynthCacheEntry() {} - - public SynthCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId, String voiceId) { - this.audioBase64 = audioBase64; - this.format = format; - this.sampleRate = sampleRate; - this.requestId = requestId; - this.voiceId = voiceId; - } - - public String getAudioBase64() { - return audioBase64; - } - - public String getFormat() { - return format; - } - - public Integer getSampleRate() { - return sampleRate; - } - - public String getRequestId() { - return requestId; - } - - public String getVoiceId() { - return voiceId; - } } /** @@ -1116,10 +986,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { * 获取默认音色ID */ private String getDefaultVoiceId() { - var config = getCosyVoiceConfig(); - if (config != null) { - return config.getDefaultVoiceId(); - } return null; } @@ -1127,32 +993,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { * 获取试听文本 */ private String getPreviewText() { - var config = getCosyVoiceConfig(); - if (config != null) { - return config.getPreviewText(); - } return "您好,欢迎体验专属音色。"; } - /** - * 获取供应商类型 - */ - private String getProviderType(String requestProviderType, VoiceCloneProvider provider) { - if (StrUtil.isNotBlank(requestProviderType)) { - return requestProviderType; - } - return provider.getProviderType(); - } - - /** - * 根据供应商类型获取对应的模型 - */ - private String getModelByProvider(String providerType) { - if (PROVIDER_SILICONFLOW.equalsIgnoreCase(providerType)) { - return MODEL_SILICONFLOW; - } - return MODEL_COSYVOICE; // 默认使用 CosyVoice 模型 - } - } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java index 63c0d8e70f..bfef0446bf 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java @@ -35,7 +35,7 @@ public class AppTikDigitalHumanCreateReqVO { @Size(max = 1024, message = "视频URL不能超过1024个字符") private String videoUrl; - @Schema(description = "音色ID(CosyVoice voiceId,系统音色使用)", example = "cosyvoice-v3-flash-sys-xxx") + @Schema(description = "音色ID(系统音色使用)", example = "alex") private String voiceId; @Schema(description = "用户音色配置ID(tik_user_voice.id,用户音色使用)", example = "123") diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java index d53ed86ae7..69f6fb3a1f 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java @@ -37,7 +37,7 @@ public class AppTikDigitalHumanRespVO { @Schema(description = "配音配置ID", example = "789") private Long voiceConfigId; - @Schema(description = "voice_id", example = "cosyvoice-v3-flash-xxx") + @Schema(description = "voice_id", example = "voice-xxx") private String voiceId; @Schema(description = "语速", example = "1.0") diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java index 476574c7b5..2f8209bef9 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java @@ -39,7 +39,7 @@ public class AppTikUserVoiceCreateReqVO { @Size(max = 4000, message = "音频文本不能超过 4000 个字符") private String text; - @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") + @Schema(description = "供应商类型:siliconflow-硅基流动(不传则使用默认)", example = "siliconflow") private String providerType; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java index 7f4f2cd6e7..ff5a1e4993 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java @@ -38,9 +38,6 @@ public class AppTikUserVoiceRespVO { @Schema(description = "备注", example = "这是一个测试配音") private String note; - @Schema(description = "复刻音色ID(CosyVoice 语音复刻生成的 voice_id)") - private String voiceId; - @Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED) private LocalDateTime createTime; diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java index 3e6796f5ad..b06d6456a7 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java @@ -14,7 +14,7 @@ public class AppTikVoicePreviewReqVO { @Schema(description = "配音编号(tik_user_voice.id),用户配音必传,系统配音可不传") private Long voiceConfigId; - @Schema(description = "CosyVoice音色ID(系统配音必传,用户配音可不传)") + @Schema(description = "音色ID(系统配音必传,用户配音可不传)") private String voiceId; @Schema(description = "语音文件URL(当使用语音URL合成时必传,替代voiceId)") @@ -43,7 +43,7 @@ public class AppTikVoicePreviewReqVO { @Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读") private String instruction; - @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") + @Schema(description = "供应商类型:siliconflow-硅基流动(不传则使用默认)", example = "siliconflow") private String providerType; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java index 61cad75d87..684159fbed 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java @@ -20,7 +20,7 @@ public class AppTikVoicePreviewRespVO { @Schema(description = "采样率", example = "24000") private Integer sampleRate; - @Schema(description = "CosyVoice 请求ID") + @Schema(description = "请求ID") private String requestId; @Schema(description = "使用的音色 ID") diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java index 0105cab406..d933629ee7 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java @@ -21,13 +21,13 @@ public class AppTikVoiceTtsReqVO { @Size(max = 4000, message = "识别文本不能超过 4000 个字符") private String transcriptionText; - @Schema(description = "音色 ID(CosyVoice voiceId)", example = "cosyvoice-v3-flash-myvoice-xxx") + @Schema(description = "音色 ID(系统音色)", example = "alex") private String voiceId; @Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)") private String fileUrl; - @Schema(description = "模型名称,默认 cosyvoice-v3-flash", example = "cosyvoice-v3-flash") + @Schema(description = "模型名称", example = "IndexTeam/IndexTTS-2") private String model; @Schema(description = "语速,默认 1.0", example = "1.0") @@ -45,7 +45,7 @@ public class AppTikVoiceTtsReqVO { @Schema(description = "音频格式,默认 wav,可选 mp3") private String audioFormat; - @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") + @Schema(description = "供应商类型:siliconflow-硅基流动(不传则使用默认)", example = "siliconflow") private String providerType; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java index 8eef2aea2a..1f94f34151 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java @@ -4,7 +4,7 @@ import io.swagger.v3.oas.annotations.media.Schema; import lombok.Data; @Data -@Schema(description = "CosyVoice 文本转语音响应") +@Schema(description = "文本转语音响应") public class AppTikVoiceTtsRespVO { @Schema(description = "用户文件编号", example = "1024") @@ -23,7 +23,7 @@ public class AppTikVoiceTtsRespVO { @Schema(description = "采样率", example = "24000") private Integer sampleRate; - @Schema(description = "CosyVoice 请求ID") + @Schema(description = "请求ID") private String requestId; @Schema(description = "使用的音色 ID") diff --git a/yudao-server/src/main/resources/application-local.yaml b/yudao-server/src/main/resources/application-local.yaml index 07c4a92085..6f9726aaf5 100644 --- a/yudao-server/src/main/resources/application-local.yaml +++ b/yudao-server/src/main/resources/application-local.yaml @@ -225,10 +225,6 @@ wx: yudao: voice: default-provider: siliconflow - cosyvoice: - enabled: true - api-key: sk-10c746f8cb8640738f8d6b71af699003 - default-model: cosyvoice-v3-flash siliconflow: enabled: true api-key: sk-kcvifijrafkzxsmnxbgxspnxdvjiaawcbyoiqhmfobykynpx diff --git a/yudao-server/src/main/resources/application.yaml b/yudao-server/src/main/resources/application.yaml index 1cb4b07c17..55bca0c117 100644 --- a/yudao-server/src/main/resources/application.yaml +++ b/yudao-server/src/main/resources/application.yaml @@ -215,15 +215,6 @@ spring: yudao: voice: default-provider: siliconflow - cosyvoice: - enabled: true - api-key: sk-10c746f8cb8640738f8d6b71af699003 - default-model: cosyvoice-v3-flash - sample-rate: 24000 - audio-format: mp3 - preview-text: 您好,欢迎体验专属音色 - tts-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis - voice-enrollment-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment siliconflow: enabled: true api-key: sk-kcvifijrafkzxsmnxbgxspnxdvjiaawcbyoiqhmfobykynpx