From cc5401e74313b94f813abb8f3bdb2ebb64d6ffca Mon Sep 17 00:00:00 2001 From: sion123 <450702724@qq.com> Date: Wed, 19 Nov 2025 22:52:00 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E5=90=88=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/app/web-gold/src/views/dh/Video.vue | 41 ++++-- .../tik/voice/client/CosyVoiceClient.java | 9 +- .../voice/client/dto/CosyVoiceTtsRequest.java | 5 + .../service/TikUserVoiceServiceImpl.java | 124 +++++++----------- .../voice/vo/AppTikVoicePreviewRespVO.java | 6 +- .../tik/voice/vo/AppTikVoiceTtsRespVO.java | 6 +- 6 files changed, 101 insertions(+), 90 deletions(-) diff --git a/frontend/app/web-gold/src/views/dh/Video.vue b/frontend/app/web-gold/src/views/dh/Video.vue index 3da8d428cc..ae2e56b836 100644 --- a/frontend/app/web-gold/src/views/dh/Video.vue +++ b/frontend/app/web-gold/src/views/dh/Video.vue @@ -137,6 +137,7 @@ const buildPreviewParams = (voice) => { } return { voiceConfigId: configId, + inputText: ttsText.value, // 传递用户输入的文本 emotion: emotion.value || 'neutral', speechRate: speechRate.value || 1.0, audioFormat: 'mp3' @@ -144,6 +145,7 @@ const buildPreviewParams = (voice) => { } else { return { voiceId: voice.voiceId, + inputText: ttsText.value, // 传递用户输入的文本 emotion: emotion.value || 'neutral', speechRate: speechRate.value || 1.0, audioFormat: 'mp3' @@ -209,16 +211,29 @@ const handleSynthesizeVoice = async () => { const playSynthesizedAudio = () => { // 防止重复点击 - if (isPlayingSynthesized.value || !synthesizedAudio.value?.audioUrl) { + if (isPlayingSynthesized.value || !synthesizedAudio.value) { return } - + isPlayingSynthesized.value = true - playAudioPreview(synthesizedAudio.value.audioUrl, { - onEnded: () => { + + // 优先使用Base64数据(安全方案) + if (synthesizedAudio.value.audioBase64) { + playAudioFromBase64(synthesizedAudio.value.audioBase64, synthesizedAudio.value.format, () => { isPlayingSynthesized.value = false - } - }) + }) + } + // 兼容旧的audioUrl方式(已废弃) + else if (synthesizedAudio.value.audioUrl) { + playAudioPreview(synthesizedAudio.value.audioUrl, { + onEnded: () => { + isPlayingSynthesized.value = false + } + }) + } else { + message.warning('暂无可播放的音频') + isPlayingSynthesized.value = false + } } // 视频处理 @@ -329,7 +344,7 @@ const playAudioPreview = (url, options = {}) => { }) } -const playAudioFromBase64 = (audioBase64, format = 'mp3') => { +const playAudioFromBase64 = (audioBase64, format = 'mp3', onEnded = null) => { try { previewObjectUrl && URL.revokeObjectURL(previewObjectUrl) const byteCharacters = window.atob(audioBase64) @@ -340,16 +355,18 @@ const playAudioFromBase64 = (audioBase64, format = 'mp3') => { const mime = format === 'mp3' ? 'audio/mpeg' : `audio/${format}` const blob = new Blob([new Uint8Array(byteNumbers)], { type: mime }) previewObjectUrl = URL.createObjectURL(blob) - playAudioPreview(previewObjectUrl, { + playAudioPreview(previewObjectUrl, { revokeOnEnd: true, onEnded: () => { isPlayingPreview.value = false + onEnded && onEnded() } }) } catch (error) { console.error('Base64播放失败:', error) isPlayingPreview.value = false message.error('音频播放失败') + onEnded && onEnded() } } @@ -395,13 +412,13 @@ let previewObjectUrl = ''
+ -
▶ 试听后可获取准确的说话时长
@@ -514,12 +531,12 @@ let previewObjectUrl = ''
已生成语音
- 文件编号:{{ synthesizedAudio.fileId }} 格式:{{ (synthesizedAudio.format || 'mp3').toUpperCase() }} + Base64编码
- () - .eq(TikUserFileDO::getFileId, infraFileId) - .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId()) - .orderByDesc(TikUserFileDO::getId) - .last("LIMIT 1")); - if (userFile == null) { - throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录"); - } + // 【安全方案】不暴露OSS链接,直接返回Base64编码的音频数据 + String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio()); + log.info("[synthesizeVoice][合成成功,配音编号({}),voiceId({}),format({}),audioSize={}]", + voiceConfigId, finalVoiceId, format, ttsResult.getAudio().length); AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO(); - respVO.setFileId(infraFileId); // 返回infraFileId,保持与原有逻辑一致 - respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL + respVO.setFileId(null); // 不返回fileId(避免暴露) + respVO.setAudioBase64(audioBase64); // 返回Base64数据,前端可直接播放 respVO.setFormat(format); respVO.setSampleRate(ttsResult.getSampleRate()); - respVO.setRequestId(ttsResult.getRequestId()); + respVO.setRequestId(""); // 不返回Request ID(避免暴露技术细节) respVO.setVoiceId(finalVoiceId); saveSynthCache(cacheKey, new SynthCacheEntry( @@ -537,9 +526,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { if (StrUtil.isNotBlank(voice.getVoiceId())) { log.info("[previewVoice][使用复刻音色ID试听,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId()); voiceId = voice.getVoiceId(); - transcriptionText = voice.getTranscription(); - inputText = StrUtil.blankToDefault(reqVO.getInputText(), - StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText())); + // 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText + transcriptionText = null; // 清除 transcriptionText + inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText()); } else { log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId); // 获取文件信息,用于获取文件URL @@ -593,14 +582,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { PreviewCacheEntry previewCache = getPreviewCache(cacheKey); if (previewCache != null) { - log.info("[previewVoice][使用缓存,配音编号({}),voiceId({}),fileUrl({}),cacheKey({})]", - voiceConfigId, voiceId, fileUrl, cacheKey); - // 缓存中存储的是原始URL,需要生成预签名URL - String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - return buildPreviewResp(previewCache, cachedUrl, voiceId); + log.info("[previewVoice][使用缓存,配音编号({}),voiceId({}),cacheKey({})]", + voiceConfigId, voiceId, cacheKey); + // 缓存命中,直接返回缓存的数据(Base64) + String cachedBase64 = previewCache.getAudioBase64(); + return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId); } - log.info("[previewVoice][调用CosyVoice合成,配音编号({}),voiceId({}),fileUrl({}),文本长度({})]", + log.info("[previewVoice][调用CosyVoice合成,配音编号({}),voiceId({}),fileUrl({}),文本长度({})]", voiceConfigId, voiceId, fileUrl, finalText.length()); CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest( finalText, @@ -610,26 +599,28 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { null, // 使用默认模型 speechRate, volume, + emotion, null, audioFormat, true )); String format = defaultFormat(ttsResult.getFormat(), audioFormat); - String identifier = StrUtil.isNotBlank(fileUrl) ? "fileUrl" : (StrUtil.isNotBlank(voiceId) ? voiceId : "voice"); + String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice"; String objectName = buildFileName(identifier, format); - // 上传到OSS,返回原始URL(不是预签名URL) - String resultFileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format)); - log.info("[previewVoice][合成成功,配音编号({}),voiceId({}),fileUrl({}),resultFileUrl({}),format({})]", - voiceConfigId, voiceId, fileUrl, resultFileUrl, format); - - // 生成预签名URL用于返回给前端 - String presignUrl = fileApi.presignGetUrl(resultFileUrl, PRESIGN_URL_EXPIRATION_SECONDS); - - // 缓存中存储原始URL(不是预签名URL),下次使用时再生成预签名URL - PreviewCacheEntry entry = new PreviewCacheEntry(resultFileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId()); + + // 【安全方案】不暴露OSS链接,直接返回Base64编码的音频数据 + // 这样前端可直接播放,无需额外请求,也不会暴露OSS存储信息 + String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio()); + log.info("[previewVoice][合成成功,配音编号({}),voiceId({}),format({}),audioSize={}]", + voiceConfigId, voiceId, format, ttsResult.getAudio().length); + + // 缓存Base64数据(用于提升响应速度) + PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId()); savePreviewCache(cacheKey, entry); - return buildPreviewResp(entry, presignUrl, voiceId); + + // 返回Base64数据,前端使用 data:audio/...;base64,... 格式播放 + return buildPreviewResp(audioBase64, format, voiceId); } private CosyVoiceTtsRequest buildTtsRequest(String text, @@ -639,6 +630,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { String model, Float speechRate, Float volume, + String emotion, Integer sampleRate, String audioFormat, boolean preview) { @@ -650,6 +642,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .model(model) .speechRate(speechRate) .volume(volume) + .emotion(emotion) .sampleRate(sampleRate) .audioFormat(audioFormat) .preview(preview) @@ -822,65 +815,48 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { } private AppTikVoiceTtsRespVO buildSynthResponseFromCache(AppTikVoiceTtsReqVO reqVO, SynthCacheEntry cache) { - byte[] audioBytes = Base64.getDecoder().decode(cache.getAudioBase64()); + // 直接使用缓存的Base64数据,不上传OSS String format = defaultFormat(cache.getFormat(), reqVO.getAudioFormat()); String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cache.getVoiceId()); - ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile( - "file", - buildFileName(voiceId, format), - resolveContentType(format), - audioBytes - ); - Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null); - - // 通过infraFileId查询TikUserFileDO,获取用户文件ID - TikUserFileDO userFile = userFileMapper.selectOne( - new LambdaQueryWrapperX() - .eq(TikUserFileDO::getFileId, infraFileId) - .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId()) - .orderByDesc(TikUserFileDO::getId) - .last("LIMIT 1")); - if (userFile == null) { - throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录"); - } AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO(); - respVO.setFileId(infraFileId); // 返回infraFileId,保持与原有逻辑一致 - respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL + respVO.setFileId(null); // 不返回fileId(避免暴露) + respVO.setAudioBase64(cache.getAudioBase64()); // 返回Base64数据 respVO.setFormat(format); respVO.setSampleRate(cache.getSampleRate()); - respVO.setRequestId(cache.getRequestId()); + respVO.setRequestId(""); // 不返回Request ID(避免暴露技术细节) respVO.setVoiceId(voiceId); return respVO; } - private AppTikVoicePreviewRespVO buildPreviewResp(PreviewCacheEntry entry, String presignUrl, String voiceId) { + private AppTikVoicePreviewRespVO buildPreviewResp(String audioBase64, String format, String voiceId) { AppTikVoicePreviewRespVO respVO = new AppTikVoicePreviewRespVO(); - respVO.setAudioUrl(presignUrl); - respVO.setFormat(entry.getFormat()); - respVO.setSampleRate(entry.getSampleRate()); - respVO.setRequestId(entry.getRequestId()); + respVO.setAudioBase64(audioBase64); // 返回Base64数据,前端可直接播放 + respVO.setFormat(format); + // 缓存中不存储其他信息,返回默认值 + respVO.setSampleRate(24000); // 默认采样率 + respVO.setRequestId(""); // 不返回Request ID(避免暴露技术细节) respVO.setVoiceId(voiceId); return respVO; } private static class PreviewCacheEntry { - private String fileUrl; + private String audioBase64; private String format; private Integer sampleRate; private String requestId; public PreviewCacheEntry() {} - public PreviewCacheEntry(String fileUrl, String format, Integer sampleRate, String requestId) { - this.fileUrl = fileUrl; + public PreviewCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId) { + this.audioBase64 = audioBase64; this.format = format; this.sampleRate = sampleRate; this.requestId = requestId; } - public String getFileUrl() { - return fileUrl; + public String getAudioBase64() { + return audioBase64; } public String getFormat() { diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java index 3d3bf18e7f..61cad75d87 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java @@ -7,7 +7,11 @@ import lombok.Data; @Schema(description = "音色试听响应") public class AppTikVoicePreviewRespVO { - @Schema(description = "音频播放地址(预签名 URL)") + @Schema(description = "音频Base64数据(可直接播放,使用 data:audio/...;base64,... 格式)") + private String audioBase64; + + @Schema(description = "音频播放地址(预签名 URL,已废弃,不推荐使用)") + @Deprecated private String audioUrl; @Schema(description = "音频格式", example = "wav") diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java index 0b386389c7..ded9882faa 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java @@ -10,7 +10,11 @@ public class AppTikVoiceTtsRespVO { @Schema(description = "用户文件编号", example = "1024") private Long fileId; - @Schema(description = "音频播放地址(预签名 URL)") + @Schema(description = "音频Base64数据(可直接播放,使用 data:audio/...;base64,... 格式)") + private String audioBase64; + + @Schema(description = "音频播放地址(预签名 URL,已废弃,不推荐使用)") + @Deprecated private String audioUrl; @Schema(description = "音频格式", example = "mp3")