diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java index 1d4535298e..66fb1637ab 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java @@ -120,7 +120,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider { .input(request.getText()) .voice(request.getVoiceId()) .speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f) - .sampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate()) .responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat())) .build(); @@ -150,7 +149,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider { VoiceTtsResult result = new VoiceTtsResult(); result.setAudio(Base64.getDecoder().decode(base64Audio)); result.setFormat(sfRequest.getResponseFormat()); - result.setSampleRate(sfRequest.getSampleRate()); result.setVoiceId(request.getVoiceId()); log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]", diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java index e7f4f662ae..d9add1609e 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java @@ -21,9 +21,9 @@ public class SiliconFlowTtsRequest { private String model; /** - * 待合成文本(API 参数名:text) + * 待合成文本(API 参数名:input) */ - @JsonProperty("text") + @JsonProperty("input") private String input; /** @@ -36,16 +36,12 @@ public class SiliconFlowTtsRequest { */ private Float speed; - /** - * 采样率(如 24000) - */ - @JsonProperty("sample_rate") - private Integer sampleRate; + /** - * 响应格式(mp3, wav, pcm)(API 参数名:format) + * 响应格式(mp3, opus, wav, pcm)(API 参数名:response_format) */ - @JsonProperty("format") + @JsonProperty("response_format") private String responseFormat; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java index 09e7a0cf8e..2b7db6aaad 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java @@ -427,7 +427,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { transcriptionText, reqVO.getInputText(), false); - // 移除appendEmotion调用,情感通过instruction参数传递 String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX, voiceId, @@ -493,128 +492,75 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { Long userId = SecurityFrameworkUtils.getLoginUserId(); Long voiceConfigId = reqVO.getVoiceConfigId(); - // 增加请求参数日志 - log.info("[previewVoice][开始试听,请求参数:voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]", - voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId); + log.info("[previewVoice][试听,voiceConfigId={}, voiceId={}, userId={}]", + voiceConfigId, reqVO.getVoiceId(), userId); String voiceId = null; String fileUrl = null; - String transcriptionText = null; - String inputText; + String referenceText = null; - // 1. 如果传入了fileUrl和transcriptionText,直接使用(通过语音URL合成) + // 1. 通过语音URL合成 if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) { - log.info("[previewVoice][使用语音URL合成,用户({})]", userId); - // 如果传入的是预签名URL,提取原始URL(去除查询参数),避免二次签名 String rawFileUrl = extractRawUrl(reqVO.getFileUrl()); - // 如果提取后的URL与原始URL不同,说明是预签名URL,需要重新生成预签名URL - // 否则直接使用(可能是原始URL或公开URL) - if (!rawFileUrl.equals(reqVO.getFileUrl())) { - // 重新生成预签名URL,确保有效期足够长 - fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS); - log.info("[previewVoice][检测到预签名URL,已提取原始URL并重新生成预签名URL]"); - } else { - fileUrl = reqVO.getFileUrl(); - } - transcriptionText = reqVO.getTranscriptionText(); - inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText); + fileUrl = rawFileUrl.equals(reqVO.getFileUrl()) + ? reqVO.getFileUrl() + : fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS); + referenceText = reqVO.getTranscriptionText(); } - // 2. 如果有配置ID,根据配置ID查询配音信息(用户配音) + // 2. 用户配音 else if (voiceConfigId != null) { - log.info("[previewVoice][开始试听,配音编号({}),用户({})]", voiceConfigId, userId); - TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId); - log.info("[previewVoice][查询配音结果:voice={},配音编号={},用户ID={}]", - voice != null ? "存在" : "不存在", voiceConfigId, userId); - - if (voice == null) { - log.warn("[previewVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId); - throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId); - } - if (!voice.getUserId().equals(userId)) { - log.warn("[previewVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]", - voiceConfigId, voice.getUserId(), userId); - throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户"); + if (voice == null || !voice.getUserId().equals(userId)) { + throw exception(VOICE_NOT_EXISTS, "配音不存在"); } - // 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据) if (StrUtil.isNotBlank(voice.getVoiceId())) { - log.info("[previewVoice][使用复刻音色ID试听,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId()); voiceId = voice.getVoiceId(); - // 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText - transcriptionText = null; // 清除 transcriptionText - inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText()); } else { - log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId); - // 获取文件信息,用于获取文件URL FileDO fileDO = fileMapper.selectById(voice.getFileId()); if (fileDO == null) { throw exception(VOICE_FILE_NOT_EXISTS); } - - // 使用文件URL和识别文本进行合成 fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - transcriptionText = voice.getTranscription(); - if (StrUtil.isBlank(transcriptionText)) { + referenceText = voice.getTranscription(); + if (StrUtil.isBlank(referenceText)) { throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); } - inputText = StrUtil.blankToDefault(reqVO.getInputText(), - StrUtil.blankToDefault(transcriptionText, getPreviewText())); } } - // 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId) + // 3. 系统配音 else { - log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId); voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId()); if (StrUtil.isBlank(voiceId)) { throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空"); } - inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText()); } - - String finalText = determineSynthesisText( - transcriptionText, - inputText, - true); - - // 使用请求参数或默认值 + + // 统一处理:使用前端传入的 inputText,否则使用默认试听文本 + String finalText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText()); + String instruction = reqVO.getInstruction(); - // 注意:instruction参数现在直接传递给CosyVoice,不再添加到文本中 Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f; Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f; String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3"); - // 构建缓存key(使用fileUrl或voiceId) - String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, - voiceId, - fileUrl, - finalText, - speechRate, - volume, - instruction, - audioFormat, - null); + // 缓存 + String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, voiceId, fileUrl, finalText, + speechRate, volume, instruction, audioFormat, null); PreviewCacheEntry previewCache = getPreviewCache(cacheKey); - if (previewCache != null) { - log.info("[previewVoice][使用缓存,配音编号({}),voiceId({}),cacheKey({})]", - voiceConfigId, voiceId, cacheKey); - // 缓存命中,直接返回缓存的数据(Base64) - String cachedBase64 = previewCache.getAudioBase64(); - return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId); + return buildPreviewResp(previewCache.getAudioBase64(), previewCache.getFormat(), voiceId); } - log.info("[previewVoice][调用语音合成服务,配音编号({}),voiceId({}),fileUrl({}),文本长度({}),供应商({})]", - voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType()); - - // 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认) + // TTS 合成 + log.info("[previewVoice][TTS,voiceId={}, textLen={}]", voiceId, finalText.length()); VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType()); VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder() .text(finalText) .voiceId(voiceId) .fileUrl(fileUrl) - .referenceText(transcriptionText) - .model(null) // 使用默认模型 + .referenceText(referenceText) + .model(null) .speechRate(speechRate) .volume(volume) .instruction(instruction) @@ -624,22 +570,13 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .build(); VoiceTtsResult ttsResult = provider.synthesize(ttsRequest); - String format = defaultFormat(ttsResult.getFormat(), audioFormat); - String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice"; - String objectName = buildFileName(identifier, format); - - // 【安全方案】不暴露OSS链接,直接返回Base64编码的音频数据 - // 这样前端可直接播放,无需额外请求,也不会暴露OSS存储信息 String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio()); - log.info("[previewVoice][合成成功,配音编号({}),voiceId({}),format({}),audioSize={}]", - voiceConfigId, voiceId, format, ttsResult.getAudio().length); - // 缓存Base64数据(用于提升响应速度) - PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId()); - savePreviewCache(cacheKey, entry); + savePreviewCache(cacheKey, new PreviewCacheEntry(audioBase64, format, + ttsResult.getSampleRate(), ttsResult.getRequestId())); - // 返回Base64数据,前端使用 data:audio/...;base64,... 格式播放 + log.info("[previewVoice][成功,voiceId={}, format={}, size={}]", voiceId, format, ttsResult.getAudio().length); return buildPreviewResp(audioBase64, format, voiceId); } @@ -716,25 +653,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容"); } - private String appendEmotion(String text, String emotion) { - if (StrUtil.isBlank(text)) { - return text; - } - if (StrUtil.isBlank(emotion) || "neutral".equalsIgnoreCase(emotion)) { - return text; - } - String emotionLabel = switch (emotion.toLowerCase()) { - case "happy" -> "高兴"; - case "angry" -> "愤怒"; - case "sad" -> "悲伤"; - case "scared" -> "害怕"; - case "disgusted" -> "厌恶"; - case "surprised" -> "惊讶"; - default -> emotion; - }; - return "【情感:" + emotionLabel + "】" + text; - } - /** * 从URL中提取原始URL(去除查询参数和锚点) * diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java index a10bd5955f..3e6796f5ad 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java @@ -24,7 +24,7 @@ public class AppTikVoicePreviewReqVO { @Size(max = 4000, message = "语音文本不能超过 4000 个字符") private String transcriptionText; - @Schema(description = "输入文本(可选,如果不传则使用配音的识别文本或默认文本)") + @Schema(description = "输入文本(可选,不传则使用默认试听文本)") @Size(max = 4000, message = "输入文本不能超过 4000 个字符") private String inputText;