Merge branch 'master' into 'main'

Master See merge request root/sionrui!2
2026-02-01 15:06:08 +00:00
parent faa1cbd178 49c660a6e3
commit fc52be834b
11 changed files with 63 additions and 159 deletions
--- a/frontend/app/web-gold/src/components/VoiceSelector.vue
+++ b/frontend/app/web-gold/src/components/VoiceSelector.vue
@@ -50,7 +50,7 @@ const {
  setSpeechRate,
  resetPreviewState
 } = useTTS({
-  provider: TTS_PROVIDERS.QWEN
+  provider: TTS_PROVIDERS.SILICONFLOW
 })

 // 当前选中的音色ID
--- a/frontend/app/web-gold/src/composables/useTTS.js
+++ b/frontend/app/web-gold/src/composables/useTTS.js
@@ -10,28 +10,15 @@ import { normalizeProviderType, VOICE_PROVIDER_TYPES } from '@/config/voiceConfi
 // 兼容旧代码的导出
 const TTS_PROVIDERS = VOICE_PROVIDER_TYPES

-// 供应商默认配置（使用标准化后的键名）
-const DEFAULT_CONFIG = {
-  cosyvoice: {
+const DEFAULT_CONFIG =  {
    apiEndpoint: '/api/tik/voice/tts',
    audioFormat: 'mp3',
    supportedFormats: ['mp3', 'wav']
-  },
-  azure: {
-    apiEndpoint: '/api/tik/voice/azure/tts',
-    audioFormat: 'mp3',
-    supportedFormats: ['mp3', 'wav', 'ogg']
-  },
-  aws: {
-    apiEndpoint: '/api/tik/voice/aws/tts',
-    audioFormat: 'mp3',
-    supportedFormats: ['mp3', 'wav', 'ogg']
-  }
 }

 export function useTTS(options = {}) {
  const {
-    provider = VOICE_PROVIDER_TYPES.COSYVOICE,
+    provider = VOICE_PROVIDER_TYPES.SILICONFLOW,
    customConfig = {}
  } = options

@@ -49,9 +36,7 @@ export function useTTS(options = {}) {

  // 获取当前供应商配置
  const getProviderConfig = () => {
-    const normalizedProvider = normalizeProviderType(provider)
-    const config = DEFAULT_CONFIG[normalizedProvider] || DEFAULT_CONFIG.cosyvoice
-    return { ...config, ...customConfig }
+    return DEFAULT_CONFIG
  }

  /**
--- a/frontend/app/web-gold/src/config/voiceConfig.js
+++ b/frontend/app/web-gold/src/config/voiceConfig.js
@@ -20,17 +20,14 @@ export const VOICE_PROVIDER_OPTIONS = [
  { label: '硅基流动 SiliconFlow', value: VOICE_PROVIDER_TYPES.SILICONFLOW }
 ]

-// 供应商别名映射（兼容旧名称）
-export const PROVIDER_ALIAS_MAP = {
-  [VOICE_PROVIDER_TYPES.QWEN]: VOICE_PROVIDER_TYPES.COSYVOICE
-}
+

 /**
- * 标准化供应商类型（处理别名映射）
+ * 标准化供应商类型
 */
 export function normalizeProviderType(providerType) {
  if (!providerType) return DEFAULT_VOICE_PROVIDER
-  return PROVIDER_ALIAS_MAP[providerType] || providerType
+  return VOICE_PROVIDER_TYPES[providerType] || providerType
 }

 /**
@@ -41,21 +38,13 @@ export function getProviderLabel(providerType) {
  return option?.label || providerType
 }

-/**
- * 检查供应商是否支持
- */
-export function isProviderSupported(providerType) {
-  const normalized = normalizeProviderType(providerType)
-  return Object.values(VOICE_PROVIDER_TYPES).includes(normalized)
-}
+

 // 默认导出配置对象
 export default {
  VOICE_PROVIDER_TYPES,
  DEFAULT_VOICE_PROVIDER,
  VOICE_PROVIDER_OPTIONS,
-  PROVIDER_ALIAS_MAP,
  normalizeProviderType,
  getProviderLabel,
-  isProviderSupported
 }
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
@@ -57,6 +57,15 @@ public class CosyVoiceClient {
        if (!config.isEnabled()) {
            throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
        }
+
+        // 添加详细的参数检查日志
+        String text = request != null ? request.getText() : null;
+        log.error("[CosyVoice][TTS参数检查][request={}, text={}, voiceId={}, model={}]",
+                request != null ? "存在" : "为null",
+                text != null ? "'" + text + "' (长度:" + text.length() + ")" : "为null",
+                request != null ? request.getVoiceId() : null,
+                request != null ? request.getModel() : null);
+
        if (request == null || StrUtil.isBlank(request.getText())) {
            throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
        }
@@ -86,7 +95,9 @@ public class CosyVoiceClient {
            if (StrUtil.isNotBlank(request.getInstruction())) {
               param.setInstruction(request.getInstruction());
            }
- 
+
+            log.error("[CosyVoice][SDK参数][param={}, text='{}']", param, request.getText());
+
            // 初始化合成器（同步调用传 null）
            synthesizer = new SpeechSynthesizer(param, null);

--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java
@@ -92,6 +92,9 @@ public class CosyVoiceProvider implements VoiceCloneProvider {
                .preview(request.isPreview())
                .build();

+        log.error("[CosyVoiceProvider][构建的cosyRequest][text='{}', voiceId={}, fileUrl={}]",
+                cosyRequest.getText(), cosyRequest.getVoiceId(), cosyRequest.getFileUrl());
+
        // 调用底层 Client
        cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult =
            cosyVoiceClient.synthesize(cosyRequest);
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
@@ -120,7 +120,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
                    .input(request.getText())
                    .voice(request.getVoiceId())
                    .speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
-                    .sampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate())
                    .responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
                    .build();

@@ -150,7 +149,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
            VoiceTtsResult result = new VoiceTtsResult();
            result.setAudio(Base64.getDecoder().decode(base64Audio));
            result.setFormat(sfRequest.getResponseFormat());
-            result.setSampleRate(sfRequest.getSampleRate());
            result.setVoiceId(request.getVoiceId());

            log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java
@@ -1,5 +1,6 @@
 package cn.iocoder.yudao.module.tik.voice.client.dto;

+import com.fasterxml.jackson.annotation.JsonProperty;
 import lombok.Builder;
 import lombok.Data;

@@ -20,8 +21,9 @@ public class SiliconFlowTtsRequest {
    private String model;

    /**
-     * 待合成文本
+     * 待合成文本（API 参数名：input）
     */
+    @JsonProperty("input")
    private String input;

    /**
@@ -34,14 +36,12 @@ public class SiliconFlowTtsRequest {
     */
    private Float speed;

-    /**
-     * 采样率（如 24000）
-     */
-    private Integer sampleRate;
+

    /**
-     * 响应格式（mp3, wav, pcm）
+     * 响应格式（mp3, opus, wav, pcm）（API 参数名：response_format）
     */
+    @JsonProperty("response_format")
    private String responseFormat;

 }
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -427,7 +427,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                transcriptionText,
                reqVO.getInputText(),
                false);
-        // 移除appendEmotion调用，情感通过instruction参数传递

        String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
                voiceId,
@@ -493,128 +492,75 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
        Long userId = SecurityFrameworkUtils.getLoginUserId();
        Long voiceConfigId = reqVO.getVoiceConfigId();

-        // 增加请求参数日志
-        log.info("[previewVoice][开始试听，请求参数：voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]",
-                voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
+        log.info("[previewVoice][试听，voiceConfigId={}, voiceId={}, userId={}]",
+                voiceConfigId, reqVO.getVoiceId(), userId);

        String voiceId = null;
        String fileUrl = null;
-        String transcriptionText = null;
-        String inputText;
+        String referenceText = null;

-        // 1. 如果传入了fileUrl和transcriptionText，直接使用（通过语音URL合成）
+        // 1. 通过语音URL合成
        if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
-            log.info("[previewVoice][使用语音URL合成，用户({})]", userId);
-            // 如果传入的是预签名URL，提取原始URL（去除查询参数），避免二次签名
            String rawFileUrl = extractRawUrl(reqVO.getFileUrl());
-            // 如果提取后的URL与原始URL不同，说明是预签名URL，需要重新生成预签名URL
-            // 否则直接使用（可能是原始URL或公开URL）
-            if (!rawFileUrl.equals(reqVO.getFileUrl())) {
-                // 重新生成预签名URL，确保有效期足够长
-                fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
-                log.info("[previewVoice][检测到预签名URL，已提取原始URL并重新生成预签名URL]");
-            } else {
-                fileUrl = reqVO.getFileUrl();
-            }
-            transcriptionText = reqVO.getTranscriptionText();
-            inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText);
+            fileUrl = rawFileUrl.equals(reqVO.getFileUrl())
+                    ? reqVO.getFileUrl()
+                    : fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
+            referenceText = reqVO.getTranscriptionText();
        }
-        // 2. 如果有配置ID，根据配置ID查询配音信息（用户配音）
+        // 2. 用户配音
        else if (voiceConfigId != null) {
-            log.info("[previewVoice][开始试听，配音编号({})，用户({})]", voiceConfigId, userId);
-            
            TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
-            log.info("[previewVoice][查询配音结果：voice={},配音编号={},用户ID={}]",
-                    voice != null ? "存在" : "不存在", voiceConfigId, userId);
-
-            if (voice == null) {
-                log.warn("[previewVoice][配音不存在，配音编号({})，用户({})]", voiceConfigId, userId);
-                throw exception(VOICE_NOT_EXISTS, "配音不存在，编号：" + voiceConfigId);
-            }
-            if (!voice.getUserId().equals(userId)) {
-                log.warn("[previewVoice][配音不属于当前用户，配音编号({})，配音用户({})，当前用户({})]", 
-                        voiceConfigId, voice.getUserId(), userId);
-                throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
+            if (voice == null || !voice.getUserId().equals(userId)) {
+                throw exception(VOICE_NOT_EXISTS, "配音不存在");
            }

-            // 优先使用复刻的 voice_id，如果不存在则使用文件URL（兼容旧数据）
            if (StrUtil.isNotBlank(voice.getVoiceId())) {
-                log.info("[previewVoice][使用复刻音色ID试听，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
                voiceId = voice.getVoiceId();
-                // 注意：使用 voiceId 时，不依赖 transcriptionText，直接使用前端传入的 inputText
-                transcriptionText = null;  // 清除 transcriptionText
-                inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
            } else {
-                log.info("[previewVoice][使用文件URL试听，配音编号({})]", voiceConfigId);
-                // 获取文件信息，用于获取文件URL
                FileDO fileDO = fileMapper.selectById(voice.getFileId());
                if (fileDO == null) {
                    throw exception(VOICE_FILE_NOT_EXISTS);
                }
-
-                // 使用文件URL和识别文本进行合成
                fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
-                transcriptionText = voice.getTranscription();
-                if (StrUtil.isBlank(transcriptionText)) {
+                referenceText = voice.getTranscription();
+                if (StrUtil.isBlank(referenceText)) {
                    throw exception(VOICE_NOT_EXISTS, "配音识别文本为空，请先进行语音识别");
                }
-                inputText = StrUtil.blankToDefault(reqVO.getInputText(),
-                        StrUtil.blankToDefault(transcriptionText, getPreviewText()));
            }
        }
-        // 3. 如果没有配置ID，使用系统配音配置（需要前端传voiceId）
+        // 3. 系统配音
        else {
-            log.info("[previewVoice][开始试听，使用系统配音配置，用户({})]", userId);
            voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
            if (StrUtil.isBlank(voiceId)) {
                throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
            }
-            inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
        }
-        
-        String finalText = determineSynthesisText(
-                transcriptionText,
-                inputText,
-                true);
-        
-        // 使用请求参数或默认值
+
+        // 统一处理：使用前端传入的 inputText，否则使用默认试听文本
+        String finalText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
+
        String instruction = reqVO.getInstruction();
-        // 注意：instruction参数现在直接传递给CosyVoice，不再添加到文本中
        Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
        Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
        String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");

-        // 构建缓存key（使用fileUrl或voiceId）
-        String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX,
-                voiceId,
-                fileUrl,
-                finalText,
-                speechRate,
-                volume,
-                instruction,
-                audioFormat,
-                null);
+        // 缓存
+        String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, voiceId, fileUrl, finalText,
+                speechRate, volume, instruction, audioFormat, null);
        PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
-
        if (previewCache != null) {
-            log.info("[previewVoice][使用缓存，配音编号({})，voiceId({})，cacheKey({})]",
-                    voiceConfigId, voiceId, cacheKey);
-            // 缓存命中，直接返回缓存的数据（Base64）
-            String cachedBase64 = previewCache.getAudioBase64();
-            return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
+            return buildPreviewResp(previewCache.getAudioBase64(), previewCache.getFormat(), voiceId);
        }

-        log.info("[previewVoice][调用语音合成服务，配音编号({})，voiceId({})，fileUrl({})，文本长度({})，供应商({})]",
-                voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType());
-
-        // 使用 Provider 接口进行 TTS 合成（支持前端选择供应商，不传则使用默认）
+        // TTS 合成
+        log.info("[previewVoice][TTS，voiceId={}, textLen={}]", voiceId, finalText.length());
        VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
        VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
                .text(finalText)
                .voiceId(voiceId)
                .fileUrl(fileUrl)
-                .referenceText(transcriptionText)
-                .model(null) // 使用默认模型
+                .referenceText(referenceText)
+                .model(null)
                .speechRate(speechRate)
                .volume(volume)
                .instruction(instruction)
@@ -624,22 +570,13 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                .build();

        VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
-
        String format = defaultFormat(ttsResult.getFormat(), audioFormat);
-        String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
-        String objectName = buildFileName(identifier, format);
-
-        // 【安全方案】不暴露OSS链接，直接返回Base64编码的音频数据
-        // 这样前端可直接播放，无需额外请求，也不会暴露OSS存储信息
        String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
-        log.info("[previewVoice][合成成功，配音编号({})，voiceId({})，format({})，audioSize={}]",
-                voiceConfigId, voiceId, format, ttsResult.getAudio().length);

-        // 缓存Base64数据（用于提升响应速度）
-        PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
-        savePreviewCache(cacheKey, entry);
+        savePreviewCache(cacheKey, new PreviewCacheEntry(audioBase64, format,
+                ttsResult.getSampleRate(), ttsResult.getRequestId()));

-        // 返回Base64数据，前端使用 data:audio/...;base64,... 格式播放
+        log.info("[previewVoice][成功，voiceId={}, format={}, size={}]", voiceId, format, ttsResult.getAudio().length);
        return buildPreviewResp(audioBase64, format, voiceId);
    }

@@ -716,25 +653,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
        throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
    }

-    private String appendEmotion(String text, String emotion) {
-        if (StrUtil.isBlank(text)) {
-            return text;
-        }
-        if (StrUtil.isBlank(emotion) || "neutral".equalsIgnoreCase(emotion)) {
-            return text;
-        }
-        String emotionLabel = switch (emotion.toLowerCase()) {
-            case "happy" -> "高兴";
-            case "angry" -> "愤怒";
-            case "sad" -> "悲伤";
-            case "scared" -> "害怕";
-            case "disgusted" -> "厌恶";
-            case "surprised" -> "惊讶";
-            default -> emotion;
-        };
-        return "【情感：" + emotionLabel + "】" + text;
-    }
-
    /**
     * 从URL中提取原始URL（去除查询参数和锚点）
     * 
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
@@ -24,7 +24,7 @@ public class AppTikVoicePreviewReqVO {
    @Size(max = 4000, message = "语音文本不能超过 4000 个字符")
    private String transcriptionText;

-    @Schema(description = "输入文本（可选，如果不传则使用配音的识别文本或默认文本）")
+    @Schema(description = "输入文本（可选，不传则使用默认试听文本）")
    @Size(max = 4000, message = "输入文本不能超过 4000 个字符")
    private String inputText;

--- a/yudao-server/src/main/resources/application-local.yaml
+++ b/yudao-server/src/main/resources/application-local.yaml
@@ -231,7 +231,7 @@ yudao:
      default-model: cosyvoice-v3-flash
    siliconflow:
      enabled: true
-      api-key: sk-epsakfenqnyzoxhmbucsxlhkdqlcbnimslqoivkshalvdozz
+      api-key: sk-kcvifijrafkzxsmnxbgxspnxdvjiaawcbyoiqhmfobykynpx
      base-url: https://api.siliconflow.cn
      default-model: IndexTeam/IndexTTS-2
  ice:
--- a/yudao-server/src/main/resources/application.yaml
+++ b/yudao-server/src/main/resources/application.yaml
@@ -214,7 +214,7 @@ spring:

 yudao:
  voice:
-    default-provider: cosyvoice
+    default-provider: siliconflow
    cosyvoice:
      enabled: true
      api-key: sk-10c746f8cb8640738f8d6b71af699003
@@ -225,8 +225,8 @@ yudao:
      tts-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis
      voice-enrollment-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment
    siliconflow:
-      enabled: false
-      api-key: ${SILICONFLOW_API_KEY:}
+      enabled: true
+      api-key: sk-kcvifijrafkzxsmnxbgxspnxdvjiaawcbyoiqhmfobykynpx
      base-url: https://api.siliconflow.cn
      default-model: IndexTeam/IndexTTS-2
      sample-rate: 24000