From cc5401e74313b94f813abb8f3bdb2ebb64d6ffca Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Wed, 19 Nov 2025 22:52:00 +0800
Subject: [PATCH] =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E5=90=88=E6=88=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 frontend/app/web-gold/src/views/dh/Video.vue  |  41 ++++--
 .../tik/voice/client/CosyVoiceClient.java     |   9 +-
 .../voice/client/dto/CosyVoiceTtsRequest.java |   5 +
 .../service/TikUserVoiceServiceImpl.java      | 124 +++++++-----------
 .../voice/vo/AppTikVoicePreviewRespVO.java    |   6 +-
 .../tik/voice/vo/AppTikVoiceTtsRespVO.java    |   6 +-
 6 files changed, 101 insertions(+), 90 deletions(-)
diff --git a/frontend/app/web-gold/src/views/dh/Video.vue b/frontend/app/web-gold/src/views/dh/Video.vue
index 3da8d428cc..ae2e56b836 100644
--- a/frontend/app/web-gold/src/views/dh/Video.vue
+++ b/frontend/app/web-gold/src/views/dh/Video.vue
@@ -137,6 +137,7 @@ const buildPreviewParams = (voice) => {
     }
     return {
       voiceConfigId: configId,
+      inputText: ttsText.value,  // 传递用户输入的文本
       emotion: emotion.value || 'neutral',
       speechRate: speechRate.value || 1.0,
       audioFormat: 'mp3'
@@ -144,6 +145,7 @@ const buildPreviewParams = (voice) => {
   } else {
     return {
       voiceId: voice.voiceId,
+      inputText: ttsText.value,  // 传递用户输入的文本
       emotion: emotion.value || 'neutral',
       speechRate: speechRate.value || 1.0,
       audioFormat: 'mp3'
@@ -209,16 +211,29 @@ const handleSynthesizeVoice = async () => {
 
 const playSynthesizedAudio = () => {
   // 防止重复点击
-  if (isPlayingSynthesized.value || !synthesizedAudio.value?.audioUrl) {
+  if (isPlayingSynthesized.value || !synthesizedAudio.value) {
     return
   }
-  
+
   isPlayingSynthesized.value = true
-  playAudioPreview(synthesizedAudio.value.audioUrl, {
-    onEnded: () => {
+
+  // 优先使用Base64数据（安全方案）
+  if (synthesizedAudio.value.audioBase64) {
+    playAudioFromBase64(synthesizedAudio.value.audioBase64, synthesizedAudio.value.format, () => {
       isPlayingSynthesized.value = false
-    }
-  })
+    })
+  }
+  // 兼容旧的audioUrl方式（已废弃）
+  else if (synthesizedAudio.value.audioUrl) {
+    playAudioPreview(synthesizedAudio.value.audioUrl, {
+      onEnded: () => {
+        isPlayingSynthesized.value = false
+      }
+    })
+  } else {
+    message.warning('暂无可播放的音频')
+    isPlayingSynthesized.value = false
+  }
 }
 
 // 视频处理
@@ -329,7 +344,7 @@ const playAudioPreview = (url, options = {}) => {
   })
 }
 
-const playAudioFromBase64 = (audioBase64, format = 'mp3') => {
+const playAudioFromBase64 = (audioBase64, format = 'mp3', onEnded = null) => {
   try {
     previewObjectUrl && URL.revokeObjectURL(previewObjectUrl)
     const byteCharacters = window.atob(audioBase64)
@@ -340,16 +355,18 @@ const playAudioFromBase64 = (audioBase64, format = 'mp3') => {
     const mime = format === 'mp3' ? 'audio/mpeg' : `audio/${format}`
     const blob = new Blob([new Uint8Array(byteNumbers)], { type: mime })
     previewObjectUrl = URL.createObjectURL(blob)
-    playAudioPreview(previewObjectUrl, { 
+    playAudioPreview(previewObjectUrl, {
       revokeOnEnd: true,
       onEnded: () => {
         isPlayingPreview.value = false
+        onEnded && onEnded()
       }
     })
   } catch (error) {
     console.error('Base64播放失败:', error)
     isPlayingPreview.value = false
     message.error('音频播放失败')
+    onEnded && onEnded()
   }
 }
 
@@ -395,13 +412,13 @@ let previewObjectUrl = ''
       <section class="digital-video-left">
         <!-- 文本输入 -->
         <div class="tts-section">
+          <div class="section-label">文案</div>
           <a-textarea
             v-model:value="ttsText"
             placeholder="请输入你想让角色说话的内容"
             :rows="6"
             class="tts-textarea"
           />
-          <div class="tts-hint">▶ 试听后可获取准确的说话时长</div>
 
           <!-- 音色选择 -->
           <div class="voice-selection">
@@ -514,12 +531,12 @@ let previewObjectUrl = ''
           <div v-if="synthesizedAudio" class="synth-audio-card">
             <div class="synth-audio-title">已生成语音</div>
             <div class="synth-audio-meta">
-              <span>文件编号：{{ synthesizedAudio.fileId }}</span>
               <span>格式：{{ (synthesizedAudio.format || 'mp3').toUpperCase() }}</span>
+              <span v-if="synthesizedAudio.audioBase64">Base64编码</span>
             </div>
             <div class="synth-audio-actions">
-              <a-button 
-                size="small" 
+              <a-button
+                size="small"
                 :loading="isPlayingSynthesized"
                 :disabled="isPlayingSynthesized"
                 @click="playSynthesizedAudio"
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
index 73cd624866..fbd6b3e461 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
@@ -66,16 +66,21 @@ public class CosyVoiceClient {
 
         SpeechSynthesizer synthesizer = null;
         try {
-            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}]",
+            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, emotion={}]",
                     request.getVoiceId(),
                     request.getText().length(),
-                    StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()));
+                    StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()),
+                    request.getSpeechRate(),
+                    request.getEmotion());
 
             // 使用 DashScope SDK 构建参数（严格按文档）
+            // 注意：speechRate 和 volume 需要转换为 int 类型
             SpeechSynthesisParam param = SpeechSynthesisParam.builder()
                     .apiKey(properties.getApiKey())
                     .model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()))
                     .voice(request.getVoiceId())
+                    .speechRate(request.getSpeechRate() != null ? request.getSpeechRate().intValue() : 1)
+                    .volume(request.getVolume() != null ? request.getVolume().intValue() : 0)
                     .build();
 
             // 初始化合成器（同步调用传 null）
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
index 2f270cc7d1..b09591e714 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
@@ -45,6 +45,11 @@ public class CosyVoiceTtsRequest {
      */
     private Float volume;
 
+    /**
+     * 情感，可选
+     */
+    private String emotion;
+
     /**
      * 采样率
      */
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
index 83d7c9eb17..864b66d98c 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -377,7 +377,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
             if (StrUtil.isNotBlank(voice.getVoiceId())) {
                 log.info("[synthesizeVoice][使用复刻音色ID合成，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
                 voiceId = voice.getVoiceId();
-                transcriptionText = voice.getTranscription();
+                // 注意：使用 voiceId 时，不依赖 transcriptionText，直接使用前端传入的 inputText
+                transcriptionText = null;  // 清除 transcriptionText，让 determineSynthesisText 只使用 inputText
             } else {
                 log.info("[synthesizeVoice][使用文件URL合成，配音编号({})]", voiceConfigId);
                 // 获取文件信息，用于获取文件URL
@@ -440,6 +441,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 reqVO.getModel(),
                 reqVO.getSpeechRate(),
                 reqVO.getVolume(),
+                reqVO.getEmotion(),
                 reqVO.getSampleRate(),
                 reqVO.getAudioFormat(),
                 false
@@ -447,31 +449,18 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
 
         String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
         String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
-        ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
-                "file",
-                buildFileName(finalVoiceId, format),
-                resolveContentType(format),
-                ttsResult.getAudio()
-        );
-        Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
 
-        // 通过infraFileId查询TikUserFileDO，获取用户文件ID
-        TikUserFileDO userFile = userFileMapper.selectOne(
-                new LambdaQueryWrapperX<TikUserFileDO>()
-                        .eq(TikUserFileDO::getFileId, infraFileId)
-                        .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
-                        .orderByDesc(TikUserFileDO::getId)
-                        .last("LIMIT 1"));
-        if (userFile == null) {
-            throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
-        }
+        // 【安全方案】不暴露OSS链接，直接返回Base64编码的音频数据
+        String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
+        log.info("[synthesizeVoice][合成成功，配音编号({})，voiceId({})，format({})，audioSize={}]",
+                voiceConfigId, finalVoiceId, format, ttsResult.getAudio().length);
 
         AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
-        respVO.setFileId(infraFileId); // 返回infraFileId，保持与原有逻辑一致
-        respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
+        respVO.setFileId(null);  // 不返回fileId（避免暴露）
+        respVO.setAudioBase64(audioBase64);  // 返回Base64数据，前端可直接播放
         respVO.setFormat(format);
         respVO.setSampleRate(ttsResult.getSampleRate());
-        respVO.setRequestId(ttsResult.getRequestId());
+        respVO.setRequestId("");  // 不返回Request ID（避免暴露技术细节）
         respVO.setVoiceId(finalVoiceId);
 
         saveSynthCache(cacheKey, new SynthCacheEntry(
@@ -537,9 +526,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
             if (StrUtil.isNotBlank(voice.getVoiceId())) {
                 log.info("[previewVoice][使用复刻音色ID试听，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
                 voiceId = voice.getVoiceId();
-                transcriptionText = voice.getTranscription();
-                inputText = StrUtil.blankToDefault(reqVO.getInputText(),
-                        StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
+                // 注意：使用 voiceId 时，不依赖 transcriptionText，直接使用前端传入的 inputText
+                transcriptionText = null;  // 清除 transcriptionText
+                inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
             } else {
                 log.info("[previewVoice][使用文件URL试听，配音编号({})]", voiceConfigId);
                 // 获取文件信息，用于获取文件URL
@@ -593,14 +582,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
         PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
 
         if (previewCache != null) {
-            log.info("[previewVoice][使用缓存，配音编号({})，voiceId({})，fileUrl({})，cacheKey({})]", 
-                    voiceConfigId, voiceId, fileUrl, cacheKey);
-            // 缓存中存储的是原始URL，需要生成预签名URL
-            String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
-            return buildPreviewResp(previewCache, cachedUrl, voiceId);
+            log.info("[previewVoice][使用缓存，配音编号({})，voiceId({})，cacheKey({})]",
+                    voiceConfigId, voiceId, cacheKey);
+            // 缓存命中，直接返回缓存的数据（Base64）
+            String cachedBase64 = previewCache.getAudioBase64();
+            return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
         }
 
-        log.info("[previewVoice][调用CosyVoice合成，配音编号({})，voiceId({})，fileUrl({})，文本长度({})]", 
+        log.info("[previewVoice][调用CosyVoice合成，配音编号({})，voiceId({})，fileUrl({})，文本长度({})]",
                 voiceConfigId, voiceId, fileUrl, finalText.length());
         CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
                 finalText,
@@ -610,26 +599,28 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 null, // 使用默认模型
                 speechRate,
                 volume,
+                emotion,
                 null,
                 audioFormat,
                 true
         ));
 
         String format = defaultFormat(ttsResult.getFormat(), audioFormat);
-        String identifier = StrUtil.isNotBlank(fileUrl) ? "fileUrl" : (StrUtil.isNotBlank(voiceId) ? voiceId : "voice");
+        String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
         String objectName = buildFileName(identifier, format);
-        // 上传到OSS，返回原始URL（不是预签名URL）
-        String resultFileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
-        log.info("[previewVoice][合成成功，配音编号({})，voiceId({})，fileUrl({})，resultFileUrl({})，format({})]", 
-                voiceConfigId, voiceId, fileUrl, resultFileUrl, format);
-        
-        // 生成预签名URL用于返回给前端
-        String presignUrl = fileApi.presignGetUrl(resultFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
-        
-        // 缓存中存储原始URL（不是预签名URL），下次使用时再生成预签名URL
-        PreviewCacheEntry entry = new PreviewCacheEntry(resultFileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
+
+        // 【安全方案】不暴露OSS链接，直接返回Base64编码的音频数据
+        // 这样前端可直接播放，无需额外请求，也不会暴露OSS存储信息
+        String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
+        log.info("[previewVoice][合成成功，配音编号({})，voiceId({})，format({})，audioSize={}]",
+                voiceConfigId, voiceId, format, ttsResult.getAudio().length);
+
+        // 缓存Base64数据（用于提升响应速度）
+        PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
         savePreviewCache(cacheKey, entry);
-        return buildPreviewResp(entry, presignUrl, voiceId);
+
+        // 返回Base64数据，前端使用 data:audio/...;base64,... 格式播放
+        return buildPreviewResp(audioBase64, format, voiceId);
     }
 
     private CosyVoiceTtsRequest buildTtsRequest(String text,
@@ -639,6 +630,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                                                 String model,
                                                 Float speechRate,
                                                 Float volume,
+                                                String emotion,
                                                 Integer sampleRate,
                                                 String audioFormat,
                                                 boolean preview) {
@@ -650,6 +642,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 .model(model)
                 .speechRate(speechRate)
                 .volume(volume)
+                .emotion(emotion)
                 .sampleRate(sampleRate)
                 .audioFormat(audioFormat)
                 .preview(preview)
@@ -822,65 +815,48 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
     }
 
     private AppTikVoiceTtsRespVO buildSynthResponseFromCache(AppTikVoiceTtsReqVO reqVO, SynthCacheEntry cache) {
-        byte[] audioBytes = Base64.getDecoder().decode(cache.getAudioBase64());
+        // 直接使用缓存的Base64数据，不上传OSS
         String format = defaultFormat(cache.getFormat(), reqVO.getAudioFormat());
         String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cache.getVoiceId());
-        ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
-                "file",
-                buildFileName(voiceId, format),
-                resolveContentType(format),
-                audioBytes
-        );
-        Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
-        
-        // 通过infraFileId查询TikUserFileDO，获取用户文件ID
-        TikUserFileDO userFile = userFileMapper.selectOne(
-                new LambdaQueryWrapperX<TikUserFileDO>()
-                        .eq(TikUserFileDO::getFileId, infraFileId)
-                        .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
-                        .orderByDesc(TikUserFileDO::getId)
-                        .last("LIMIT 1"));
-        if (userFile == null) {
-            throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
-        }
 
         AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
-        respVO.setFileId(infraFileId); // 返回infraFileId，保持与原有逻辑一致
-        respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
+        respVO.setFileId(null);  // 不返回fileId（避免暴露）
+        respVO.setAudioBase64(cache.getAudioBase64());  // 返回Base64数据
         respVO.setFormat(format);
         respVO.setSampleRate(cache.getSampleRate());
-        respVO.setRequestId(cache.getRequestId());
+        respVO.setRequestId("");  // 不返回Request ID（避免暴露技术细节）
         respVO.setVoiceId(voiceId);
         return respVO;
     }
 
-    private AppTikVoicePreviewRespVO buildPreviewResp(PreviewCacheEntry entry, String presignUrl, String voiceId) {
+    private AppTikVoicePreviewRespVO buildPreviewResp(String audioBase64, String format, String voiceId) {
         AppTikVoicePreviewRespVO respVO = new AppTikVoicePreviewRespVO();
-        respVO.setAudioUrl(presignUrl);
-        respVO.setFormat(entry.getFormat());
-        respVO.setSampleRate(entry.getSampleRate());
-        respVO.setRequestId(entry.getRequestId());
+        respVO.setAudioBase64(audioBase64);  // 返回Base64数据，前端可直接播放
+        respVO.setFormat(format);
+        // 缓存中不存储其他信息，返回默认值
+        respVO.setSampleRate(24000);  // 默认采样率
+        respVO.setRequestId("");  // 不返回Request ID（避免暴露技术细节）
         respVO.setVoiceId(voiceId);
         return respVO;
     }
 
     private static class PreviewCacheEntry {
-        private String fileUrl;
+        private String audioBase64;
         private String format;
         private Integer sampleRate;
         private String requestId;
 
         public PreviewCacheEntry() {}
 
-        public PreviewCacheEntry(String fileUrl, String format, Integer sampleRate, String requestId) {
-            this.fileUrl = fileUrl;
+        public PreviewCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId) {
+            this.audioBase64 = audioBase64;
             this.format = format;
             this.sampleRate = sampleRate;
             this.requestId = requestId;
         }
 
-        public String getFileUrl() {
-            return fileUrl;
+        public String getAudioBase64() {
+            return audioBase64;
         }
 
         public String getFormat() {
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
index 3d3bf18e7f..61cad75d87 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
@@ -7,7 +7,11 @@ import lombok.Data;
 @Schema(description = "音色试听响应")
 public class AppTikVoicePreviewRespVO {
 
-    @Schema(description = "音频播放地址（预签名 URL）")
+    @Schema(description = "音频Base64数据（可直接播放，使用 data:audio/...;base64,... 格式）")
+    private String audioBase64;
+
+    @Schema(description = "音频播放地址（预签名 URL，已废弃，不推荐使用）")
+    @Deprecated
     private String audioUrl;
 
     @Schema(description = "音频格式", example = "wav")
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
index 0b386389c7..ded9882faa 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
@@ -10,7 +10,11 @@ public class AppTikVoiceTtsRespVO {
     @Schema(description = "用户文件编号", example = "1024")
     private Long fileId;
 
-    @Schema(description = "音频播放地址（预签名 URL）")
+    @Schema(description = "音频Base64数据（可直接播放，使用 data:audio/...;base64,... 格式）")
+    private String audioBase64;
+
+    @Schema(description = "音频播放地址（预签名 URL，已废弃，不推荐使用）")
+    @Deprecated
     private String audioUrl;
 
     @Schema(description = "音频格式", example = "mp3")