语音合成

2025-11-19 22:52:00 +08:00
parent 75abf48bc1
commit cc5401e743
6 changed files with 101 additions and 90 deletions
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
@@ -66,16 +66,21 @@ public class CosyVoiceClient {

        SpeechSynthesizer synthesizer = null;
        try {
-            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}]",
+            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, emotion={}]",
                    request.getVoiceId(),
                    request.getText().length(),
-                    StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()));
+                    StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()),
+                    request.getSpeechRate(),
+                    request.getEmotion());

            // 使用 DashScope SDK 构建参数（严格按文档）
+            // 注意：speechRate 和 volume 需要转换为 int 类型
            SpeechSynthesisParam param = SpeechSynthesisParam.builder()
                    .apiKey(properties.getApiKey())
                    .model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()))
                    .voice(request.getVoiceId())
+                    .speechRate(request.getSpeechRate() != null ? request.getSpeechRate().intValue() : 1)
+                    .volume(request.getVolume() != null ? request.getVolume().intValue() : 0)
                    .build();

            // 初始化合成器（同步调用传 null）
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
@@ -45,6 +45,11 @@ public class CosyVoiceTtsRequest {
     */
    private Float volume;

+    /**
+     * 情感，可选
+     */
+    private String emotion;
+
    /**
     * 采样率
     */
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -377,7 +377,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
            if (StrUtil.isNotBlank(voice.getVoiceId())) {
                log.info("[synthesizeVoice][使用复刻音色ID合成，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
                voiceId = voice.getVoiceId();
-                transcriptionText = voice.getTranscription();
+                // 注意：使用 voiceId 时，不依赖 transcriptionText，直接使用前端传入的 inputText
+                transcriptionText = null;  // 清除 transcriptionText，让 determineSynthesisText 只使用 inputText
            } else {
                log.info("[synthesizeVoice][使用文件URL合成，配音编号({})]", voiceConfigId);
                // 获取文件信息，用于获取文件URL
@@ -440,6 +441,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                reqVO.getModel(),
                reqVO.getSpeechRate(),
                reqVO.getVolume(),
+                reqVO.getEmotion(),
                reqVO.getSampleRate(),
                reqVO.getAudioFormat(),
                false
@@ -447,31 +449,18 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {

        String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
        String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
-        ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
-                "file",
-                buildFileName(finalVoiceId, format),
-                resolveContentType(format),
-                ttsResult.getAudio()
-        );
-        Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);

-        // 通过infraFileId查询TikUserFileDO，获取用户文件ID
-        TikUserFileDO userFile = userFileMapper.selectOne(
-                new LambdaQueryWrapperX<TikUserFileDO>()
-                        .eq(TikUserFileDO::getFileId, infraFileId)
-                        .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
-                        .orderByDesc(TikUserFileDO::getId)
-                        .last("LIMIT 1"));
-        if (userFile == null) {
-            throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
-        }
+        // 【安全方案】不暴露OSS链接，直接返回Base64编码的音频数据
+        String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
+        log.info("[synthesizeVoice][合成成功，配音编号({})，voiceId({})，format({})，audioSize={}]",
+                voiceConfigId, finalVoiceId, format, ttsResult.getAudio().length);

        AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
-        respVO.setFileId(infraFileId); // 返回infraFileId，保持与原有逻辑一致
-        respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
+        respVO.setFileId(null);  // 不返回fileId（避免暴露）
+        respVO.setAudioBase64(audioBase64);  // 返回Base64数据，前端可直接播放
        respVO.setFormat(format);
        respVO.setSampleRate(ttsResult.getSampleRate());
-        respVO.setRequestId(ttsResult.getRequestId());
+        respVO.setRequestId("");  // 不返回Request ID（避免暴露技术细节）
        respVO.setVoiceId(finalVoiceId);

        saveSynthCache(cacheKey, new SynthCacheEntry(
@@ -537,9 +526,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
            if (StrUtil.isNotBlank(voice.getVoiceId())) {
                log.info("[previewVoice][使用复刻音色ID试听，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
                voiceId = voice.getVoiceId();
-                transcriptionText = voice.getTranscription();
-                inputText = StrUtil.blankToDefault(reqVO.getInputText(),
-                        StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
+                // 注意：使用 voiceId 时，不依赖 transcriptionText，直接使用前端传入的 inputText
+                transcriptionText = null;  // 清除 transcriptionText
+                inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
            } else {
                log.info("[previewVoice][使用文件URL试听，配音编号({})]", voiceConfigId);
                // 获取文件信息，用于获取文件URL
@@ -593,14 +582,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
        PreviewCacheEntry previewCache = getPreviewCache(cacheKey);

        if (previewCache != null) {
-            log.info("[previewVoice][使用缓存，配音编号({})，voiceId({})，fileUrl({})，cacheKey({})]", 
-                    voiceConfigId, voiceId, fileUrl, cacheKey);
-            // 缓存中存储的是原始URL，需要生成预签名URL
-            String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
-            return buildPreviewResp(previewCache, cachedUrl, voiceId);
+            log.info("[previewVoice][使用缓存，配音编号({})，voiceId({})，cacheKey({})]",
+                    voiceConfigId, voiceId, cacheKey);
+            // 缓存命中，直接返回缓存的数据（Base64）
+            String cachedBase64 = previewCache.getAudioBase64();
+            return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
        }

-        log.info("[previewVoice][调用CosyVoice合成，配音编号({})，voiceId({})，fileUrl({})，文本长度({})]", 
+        log.info("[previewVoice][调用CosyVoice合成，配音编号({})，voiceId({})，fileUrl({})，文本长度({})]",
                voiceConfigId, voiceId, fileUrl, finalText.length());
        CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
                finalText,
@@ -610,26 +599,28 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                null, // 使用默认模型
                speechRate,
                volume,
+                emotion,
                null,
                audioFormat,
                true
        ));

        String format = defaultFormat(ttsResult.getFormat(), audioFormat);
-        String identifier = StrUtil.isNotBlank(fileUrl) ? "fileUrl" : (StrUtil.isNotBlank(voiceId) ? voiceId : "voice");
+        String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
        String objectName = buildFileName(identifier, format);
-        // 上传到OSS，返回原始URL（不是预签名URL）
-        String resultFileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
-        log.info("[previewVoice][合成成功，配音编号({})，voiceId({})，fileUrl({})，resultFileUrl({})，format({})]", 
-                voiceConfigId, voiceId, fileUrl, resultFileUrl, format);
-        
-        // 生成预签名URL用于返回给前端
-        String presignUrl = fileApi.presignGetUrl(resultFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
-        
-        // 缓存中存储原始URL（不是预签名URL），下次使用时再生成预签名URL
-        PreviewCacheEntry entry = new PreviewCacheEntry(resultFileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
+
+        // 【安全方案】不暴露OSS链接，直接返回Base64编码的音频数据
+        // 这样前端可直接播放，无需额外请求，也不会暴露OSS存储信息
+        String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
+        log.info("[previewVoice][合成成功，配音编号({})，voiceId({})，format({})，audioSize={}]",
+                voiceConfigId, voiceId, format, ttsResult.getAudio().length);
+
+        // 缓存Base64数据（用于提升响应速度）
+        PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
        savePreviewCache(cacheKey, entry);
-        return buildPreviewResp(entry, presignUrl, voiceId);
+
+        // 返回Base64数据，前端使用 data:audio/...;base64,... 格式播放
+        return buildPreviewResp(audioBase64, format, voiceId);
    }

    private CosyVoiceTtsRequest buildTtsRequest(String text,
@@ -639,6 +630,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                                                String model,
                                                Float speechRate,
                                                Float volume,
+                                                String emotion,
                                                Integer sampleRate,
                                                String audioFormat,
                                                boolean preview) {
@@ -650,6 +642,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                .model(model)
                .speechRate(speechRate)
                .volume(volume)
+                .emotion(emotion)
                .sampleRate(sampleRate)
                .audioFormat(audioFormat)
                .preview(preview)
@@ -822,65 +815,48 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
    }

    private AppTikVoiceTtsRespVO buildSynthResponseFromCache(AppTikVoiceTtsReqVO reqVO, SynthCacheEntry cache) {
-        byte[] audioBytes = Base64.getDecoder().decode(cache.getAudioBase64());
+        // 直接使用缓存的Base64数据，不上传OSS
        String format = defaultFormat(cache.getFormat(), reqVO.getAudioFormat());
        String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cache.getVoiceId());
-        ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
-                "file",
-                buildFileName(voiceId, format),
-                resolveContentType(format),
-                audioBytes
-        );
-        Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
-        
-        // 通过infraFileId查询TikUserFileDO，获取用户文件ID
-        TikUserFileDO userFile = userFileMapper.selectOne(
-                new LambdaQueryWrapperX<TikUserFileDO>()
-                        .eq(TikUserFileDO::getFileId, infraFileId)
-                        .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
-                        .orderByDesc(TikUserFileDO::getId)
-                        .last("LIMIT 1"));
-        if (userFile == null) {
-            throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
-        }

        AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
-        respVO.setFileId(infraFileId); // 返回infraFileId，保持与原有逻辑一致
-        respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
+        respVO.setFileId(null);  // 不返回fileId（避免暴露）
+        respVO.setAudioBase64(cache.getAudioBase64());  // 返回Base64数据
        respVO.setFormat(format);
        respVO.setSampleRate(cache.getSampleRate());
-        respVO.setRequestId(cache.getRequestId());
+        respVO.setRequestId("");  // 不返回Request ID（避免暴露技术细节）
        respVO.setVoiceId(voiceId);
        return respVO;
    }

-    private AppTikVoicePreviewRespVO buildPreviewResp(PreviewCacheEntry entry, String presignUrl, String voiceId) {
+    private AppTikVoicePreviewRespVO buildPreviewResp(String audioBase64, String format, String voiceId) {
        AppTikVoicePreviewRespVO respVO = new AppTikVoicePreviewRespVO();
-        respVO.setAudioUrl(presignUrl);
-        respVO.setFormat(entry.getFormat());
-        respVO.setSampleRate(entry.getSampleRate());
-        respVO.setRequestId(entry.getRequestId());
+        respVO.setAudioBase64(audioBase64);  // 返回Base64数据，前端可直接播放
+        respVO.setFormat(format);
+        // 缓存中不存储其他信息，返回默认值
+        respVO.setSampleRate(24000);  // 默认采样率
+        respVO.setRequestId("");  // 不返回Request ID（避免暴露技术细节）
        respVO.setVoiceId(voiceId);
        return respVO;
    }

    private static class PreviewCacheEntry {
-        private String fileUrl;
+        private String audioBase64;
        private String format;
        private Integer sampleRate;
        private String requestId;

        public PreviewCacheEntry() {}

-        public PreviewCacheEntry(String fileUrl, String format, Integer sampleRate, String requestId) {
-            this.fileUrl = fileUrl;
+        public PreviewCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId) {
+            this.audioBase64 = audioBase64;
            this.format = format;
            this.sampleRate = sampleRate;
            this.requestId = requestId;
        }

-        public String getFileUrl() {
-            return fileUrl;
+        public String getAudioBase64() {
+            return audioBase64;
        }

        public String getFormat() {
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
@@ -7,7 +7,11 @@ import lombok.Data;
@Schema(description = "音色试听响应")
 public class AppTikVoicePreviewRespVO {

-    @Schema(description = "音频播放地址（预签名 URL）")
+    @Schema(description = "音频Base64数据（可直接播放，使用 data:audio/...;base64,... 格式）")
+    private String audioBase64;
+
+    @Schema(description = "音频播放地址（预签名 URL，已废弃，不推荐使用）")
+    @Deprecated
    private String audioUrl;

    @Schema(description = "音频格式", example = "wav")
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
@@ -10,7 +10,11 @@ public class AppTikVoiceTtsRespVO {
    @Schema(description = "用户文件编号", example = "1024")
    private Long fileId;

-    @Schema(description = "音频播放地址（预签名 URL）")
+    @Schema(description = "音频Base64数据（可直接播放，使用 data:audio/...;base64,... 格式）")
+    private String audioBase64;
+
+    @Schema(description = "音频播放地址（预签名 URL，已废弃，不推荐使用）")
+    @Deprecated
    private String audioUrl;

    @Schema(description = "音频格式", example = "mp3")