feat: 功能优化

2026-01-27 01:39:08 +08:00
parent bf12e70339
commit 24f66c8e81
24 changed files with 1570 additions and 133 deletions
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceProvider.java
@@ -0,0 +1,160 @@
+package cn.iocoder.yudao.module.tik.voice.client;
+
+import cn.hutool.core.util.StrUtil;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
+import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
+import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
+import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+/**
+ * CosyVoice Provider 实现
+ *
+ * <p>阿里云 CosyVoice 语音服务的 Provider 实现。
+ * 内部委托给 {@link CosyVoiceClient} 进行实际的API调用。
+ *
+ * @author 芋道源码
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class CosyVoiceProvider implements VoiceCloneProvider {
+
+    private final CosyVoiceClient cosyVoiceClient;
+
+    /**
+     * 新配置（支持多供应商）
+     */
+    private final VoiceProviderProperties voiceProviderProperties;
+
+    /**
+     * 旧配置（向后兼容）
+     */
+    private final CosyVoiceProperties cosyVoiceProperties;
+
+    /**
+     * 获取 CosyVoice 配置
+     * 优先使用新配置，如果不存在则使用旧配置（向后兼容）
+     */
+    private CosyVoiceProviderConfig getConfig() {
+        // 尝试从新配置获取
+        var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice");
+        if (baseConfig instanceof CosyVoiceProviderConfig cosyConfig) {
+            return cosyConfig;
+        }
+
+        // 回退到旧配置（向后兼容）
+        if (cosyVoiceProperties != null && cosyVoiceProperties.isEnabled()) {
+            return migrateFromLegacyConfig(cosyVoiceProperties);
+        }
+
+        // 返回空配置
+        return new CosyVoiceProviderConfig();
+    }
+
+    /**
+     * 从旧配置迁移到新配置格式
+     */
+    private CosyVoiceProviderConfig migrateFromLegacyConfig(CosyVoiceProperties legacy) {
+        var config = new CosyVoiceProviderConfig();
+        config.setEnabled(true);
+        config.setApiKey(legacy.getApiKey());
+        config.setDefaultModel(legacy.getDefaultModel());
+        config.setDefaultVoiceId(legacy.getDefaultVoiceId());
+        config.setSampleRate(legacy.getSampleRate());
+        config.setAudioFormat(legacy.getAudioFormat());
+        config.setPreviewText(legacy.getPreviewText());
+        config.setTtsUrl(legacy.getTtsUrl());
+        config.setVoiceEnrollmentUrl(legacy.getVoiceEnrollmentUrl());
+        config.setConnectTimeout(legacy.getConnectTimeout());
+        config.setReadTimeout(legacy.getReadTimeout());
+        return config;
+    }
+
+    @Override
+    public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
+        log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]",
+                request.getAudioUrl(), request.getModel());
+
+        // 适配到 CosyVoiceCloneRequest
+        cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest cosyRequest =
+            new cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest();
+
+        cosyRequest.setUrl(request.getAudioUrl());
+        cosyRequest.setTargetModel(request.getModel());
+        cosyRequest.setPrefix(request.getPrefix());
+        if (request.getSampleRate() != null) {
+            cosyRequest.setSampleRate(request.getSampleRate());
+        }
+        if (request.getAudioFormat() != null) {
+            cosyRequest.setAudioFormat(request.getAudioFormat());
+        }
+
+        // 调用底层 Client
+        cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult cosyResult =
+            cosyVoiceClient.cloneVoice(cosyRequest);
+
+        // 适配到统一 Result
+        VoiceCloneResult result = new VoiceCloneResult();
+        result.setVoiceId(cosyResult.getVoiceId());
+        result.setRequestId(cosyResult.getRequestId());
+
+        log.info("[CosyVoiceProvider][语音克隆成功][voiceId={}]", result.getVoiceId());
+        return result;
+    }
+
+    @Override
+    public VoiceTtsResult synthesize(VoiceTtsRequest request) {
+        log.info("[CosyVoiceProvider][语音合成][voiceId={}, textLength={}, model={}]",
+                request.getVoiceId(),
+                request.getText() != null ? request.getText().length() : 0,
+                request.getModel());
+
+        // 适配到 CosyVoiceTtsRequest
+        cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest cosyRequest =
+            cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest.builder()
+                .text(request.getText())
+                .voiceId(request.getVoiceId())
+                .fileUrl(request.getFileUrl())
+                .referenceText(request.getReferenceText())
+                .model(request.getModel())
+                .speechRate(request.getSpeechRate())
+                .volume(request.getVolume())
+                .instruction(request.getInstruction())
+                .sampleRate(request.getSampleRate())
+                .audioFormat(request.getAudioFormat())
+                .preview(request.isPreview())
+                .build();
+
+        // 调用底层 Client
+        cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult =
+            cosyVoiceClient.synthesize(cosyRequest);
+
+        // 适配到统一 Result
+        VoiceTtsResult result = new VoiceTtsResult();
+        result.setRequestId(cosyResult.getRequestId());
+        result.setFormat(cosyResult.getFormat());
+        result.setSampleRate(cosyResult.getSampleRate());
+        result.setAudio(cosyResult.getAudio());
+        result.setVoiceId(cosyResult.getVoiceId());
+
+        log.info("[CosyVoiceProvider][语音合成成功][format={}, audioSize={}]",
+                result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0);
+        return result;
+    }
+
+    @Override
+    public boolean supports(String providerType) {
+        return "cosyvoice".equalsIgnoreCase(providerType);
+    }
+
+    @Override
+    public String getProviderType() {
+        return "cosyvoice";
+    }
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProvider.java
@@ -0,0 +1,55 @@
+package cn.iocoder.yudao.module.tik.voice.client;
+
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
+
+/**
+ * 语音克隆 Provider 统一接口
+ *
+ * <p>支持多供应商实现的语音克隆和语音合成服务。
+ * 通过工厂类 {@link VoiceCloneProviderFactory} 获取具体实现。
+ *
+ * @author 芋道源码
+ */
+public interface VoiceCloneProvider {
+
+    /**
+     * 语音克隆
+     *
+     * <p>根据提供的音频文件URL，克隆目标音色。
+     * 不同供应商的实现细节被此接口屏蔽。
+     *
+     * @param request 语音克隆请求
+     * @return 语音克隆结果，包含生成的 voiceId
+     * @throws RuntimeException 当克隆失败时抛出
+     */
+    VoiceCloneResult cloneVoice(VoiceCloneRequest request);
+
+    /**
+     * 文本转语音合成
+     *
+     * <p>将文本转换为语音，支持使用已克隆的音色或系统音色。
+     *
+     * @param request 语音合成请求
+     * @return 语音合成结果，包含音频数据
+     * @throws RuntimeException 当合成失败时抛出
+     */
+    VoiceTtsResult synthesize(VoiceTtsRequest request);
+
+    /**
+     * 检查是否支持指定的供应商类型
+     *
+     * @param providerType 供应商类型（如 "cosyvoice", "siliconflow"）
+     * @return true 如果支持，false 否则
+     */
+    boolean supports(String providerType);
+
+    /**
+     * 获取供应商类型标识
+     *
+     * @return 供应商类型，如 "cosyvoice", "siliconflow"
+     */
+    String getProviderType();
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/VoiceCloneProviderFactory.java
@@ -0,0 +1,104 @@
+package cn.iocoder.yudao.module.tik.voice.client;
+
+import cn.iocoder.yudao.framework.common.exception.ServiceException;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
+import static cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants.VOICE_TTS_FAILED;
+
+/**
+ * 语音克隆 Provider 工厂
+ *
+ * <p>负责管理和获取不同的语音克隆 Provider 实现。
+ * 支持多供应商配置和动态切换。
+ *
+ * @author 芋道源码
+ */
+@Slf4j
+@Component
+public class VoiceCloneProviderFactory {
+
+    private final Map<String, VoiceCloneProvider> providers = new ConcurrentHashMap<>();
+
+    @Autowired
+    public VoiceCloneProviderFactory(List<VoiceCloneProvider> providerList) {
+        // 自动注册所有 Provider 实现类
+        for (VoiceCloneProvider provider : providerList) {
+            registerProvider(provider);
+            log.info("[VoiceCloneProviderFactory][注册Provider][type={}]", provider.getProviderType());
+        }
+    }
+
+    /**
+     * 注册 Provider
+     *
+     * @param provider Provider 实例
+     */
+    public void registerProvider(VoiceCloneProvider provider) {
+        String type = provider.getProviderType();
+        if (providers.containsKey(type)) {
+            log.warn("[VoiceCloneProviderFactory][Provider已存在，覆盖][type={}]", type);
+        }
+        providers.put(type, provider);
+    }
+
+    /**
+     * 获取默认 Provider
+     *
+     * @return 默认的 Provider 实例
+     * @throws ServiceException 当没有可用的 Provider 时抛出
+     */
+    public VoiceCloneProvider getDefaultProvider() {
+        if (providers.isEmpty()) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "未配置任何语音克隆 Provider");
+        }
+        // 返回第一个注册的 Provider 作为默认
+        return providers.values().iterator().next();
+    }
+
+    /**
+     * 根据类型获取 Provider
+     *
+     * @param providerType 供应商类型（如 "cosyvoice", "siliconflow"）
+     * @return 对应的 Provider 实例
+     * @throws ServiceException 当 Provider 不存在时抛出
+     */
+    public VoiceCloneProvider getProvider(String providerType) {
+        if (providerType == null || providerType.trim().isEmpty()) {
+            return getDefaultProvider();
+        }
+
+        VoiceCloneProvider provider = providers.get(providerType);
+        if (provider == null) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "不支持的语音克隆供应商: " + providerType);
+        }
+
+        return provider;
+    }
+
+    /**
+     * 检查是否支持指定的供应商类型
+     *
+     * @param providerType 供应商类型
+     * @return true 如果支持，false 否则
+     */
+    public boolean hasProvider(String providerType) {
+        return providerType != null && providers.containsKey(providerType);
+    }
+
+    /**
+     * 获取所有已注册的 Provider 类型
+     *
+     * @return 供应商类型列表
+     */
+    public List<String> getAvailableProviderTypes() {
+        return List.copyOf(providers.keySet());
+    }
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneRequest.java
@@ -0,0 +1,51 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+/**
+ * 语音克隆请求（统一DTO）
+ *
+ * <p>屏蔽不同供应商API差异，提供统一的请求结构。
+ * 各Provider实现负责将此DTO转换为供应商特定格式。
+ *
+ * @author 芋道源码
+ */
+@Data
+public class VoiceCloneRequest {
+
+    /**
+     * 音频文件公网URL
+     *
+     * <p>CosyVoice: 对应 {@code url} 字段</p>
+     * <p>SiliconFlow: 对应 {@code audio} 字段（需base64编码）</p>
+     */
+    private String audioUrl;
+
+    /**
+     * 模型名称
+     *
+     * <p>CosyVoice: 对应 {@code targetModel}，如 {@code cosyvoice-v3-flash}</p>
+     * <p>SiliconFlow: 对应 {@code model}，如 {@code indextts-2}</p>
+     */
+    private String model;
+
+    /**
+     * 音色自定义前缀（可选）
+     *
+     * <p>CosyVoice: 必填，仅允许数字和小写字母，长度<10字符</p>
+     * <p>SiliconFlow: 不适用</p>
+     */
+    private String prefix;
+
+    /**
+     * 采样率，默认24000
+     */
+    private Integer sampleRate;
+
+    /**
+     * 音频格式，默认mp3
+     *
+     * <p>可选值: mp3, wav, flac</p>
+     */
+    private String audioFormat;
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneResult.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceCloneResult.java
@@ -0,0 +1,24 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+/**
+ * 语音克隆结果（统一DTO）
+ *
+ * @author 芋道源码
+ */
+@Data
+public class VoiceCloneResult {
+
+    /**
+     * 生成的音色ID
+     *
+     * <p>后续TTS合成时使用此ID</p>
+     */
+    private String voiceId;
+
+    /**
+     * 请求ID（用于追踪）
+     */
+    private String requestId;
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsRequest.java
@@ -0,0 +1,77 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * 文本转语音请求（统一DTO）
+ *
+ * <p>屏蔽不同供应商API差异，提供统一的请求结构。
+ *
+ * @author 芋道源码
+ */
+@Data
+@Builder
+public class VoiceTtsRequest {
+
+    /**
+     * 待合成文本
+     */
+    private String text;
+
+    /**
+     * 音色ID（可选，默认使用配置）
+     *
+     * <p>使用语音克隆生成的voiceId</p>
+     */
+    private String voiceId;
+
+    /**
+     * 语音文件URL（当使用语音URL合成时使用，替代voiceId）
+     *
+     * <p>用于实时语音克隆，无需提前克隆</p>
+     */
+    private String fileUrl;
+
+    /**
+     * 参考音频文本（当使用fileUrl时，用于提高克隆质量）
+     */
+    private String referenceText;
+
+    /**
+     * 模型（默认使用供应商默认模型）
+     */
+    private String model;
+
+    /**
+     * 语速（0.5 - 2.0，默认1.0）
+     */
+    private Float speechRate;
+
+    /**
+     * 音量（-100 - 100，默认0）
+     */
+    private Float volume;
+
+    /**
+     * 指令（用于控制音色风格），可选
+     */
+    private String instruction;
+
+    /**
+     * 采样率（默认24000）
+     */
+    private Integer sampleRate;
+
+    /**
+     * 音频格式（默认mp3）
+     *
+     * <p>可选值: mp3, wav, flac</p>
+     */
+    private String audioFormat;
+
+    /**
+     * 是否仅用于试听（方便服务侧做限流）
+     */
+    private boolean preview;
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsResult.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/VoiceTtsResult.java
@@ -0,0 +1,39 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+/**
+ * 文本转语音结果（统一DTO）
+ *
+ * @author 芋道源码
+ */
+@Data
+public class VoiceTtsResult {
+
+    /**
+     * 请求ID（用于追踪）
+     */
+    private String requestId;
+
+    /**
+     * 返回的音频格式
+     *
+     * <p>mp3, wav, flac 等</p>
+     */
+    private String format;
+
+    /**
+     * 采样率
+     */
+    private Integer sampleRate;
+
+    /**
+     * 音频二进制内容
+     */
+    private byte[] audio;
+
+    /**
+     * 音频所使用的 voiceId
+     */
+    private String voiceId;
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProviderConfig.java
@@ -0,0 +1,64 @@
+package cn.iocoder.yudao.module.tik.voice.config;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+import java.time.Duration;
+
+/**
+ * CosyVoice 供应商配置
+ *
+ * <p>继承通用配置，添加 CosyVoice 特有字段。
+ *
+ * @author 芋道源码
+ */
+@Data
+@EqualsAndHashCode(callSuper = true)
+public class CosyVoiceProviderConfig extends VoiceProviderProperties.ProviderConfig {
+
+    /**
+     * 默认模型
+     */
+    private String defaultModel = "cosyvoice-v3-flash";
+
+    /**
+     * 默认 voiceId（可选）
+     */
+    private String defaultVoiceId;
+
+    /**
+     * 默认采样率
+     */
+    private Integer sampleRate = 24000;
+
+    /**
+     * 默认音频格式
+     */
+    private String audioFormat = "mp3";
+
+    /**
+     * 试听默认示例文本
+     */
+    private String previewText = "您好，欢迎体验专属音色。";
+
+    /**
+     * TTS 接口地址
+     */
+    private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
+
+    /**
+     * 语音复刻接口地址（声音注册）
+     */
+    private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
+
+    /**
+     * 连接超时时间
+     */
+    private Duration connectTimeout = Duration.ofSeconds(10);
+
+    /**
+     * 读取超时时间（3分钟，提升语音合成成功率）
+     */
+    private Duration readTimeout = Duration.ofSeconds(180);
+
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/VoiceProviderProperties.java
@@ -0,0 +1,78 @@
+package cn.iocoder.yudao.module.tik.voice.config;
+
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.stereotype.Component;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 语音 Provider 统一配置
+ *
+ * <p>支持多供应商配置，默认供应商选择。
+ *
+ * @author 芋道源码
+ */
+@Data
+@Component
+@ConfigurationProperties(prefix = "yudao.voice")
+public class VoiceProviderProperties {
+
+    /**
+     * 默认供应商类型
+     *
+     * <p>可选值: cosyvoice, siliconflow 等
+     */
+    private String defaultProvider = "cosyvoice";
+
+    /**
+     * 各供应商配置
+     *
+     * <p>key 为供应商类型（如 cosyvoice, siliconflow）
+     */
+    private Map<String, ProviderConfig> providers = new HashMap<>();
+
+    /**
+     * 供应商通用配置基类
+     */
+    @Data
+    public static class ProviderConfig {
+        /**
+         * 是否启用
+         */
+        private boolean enabled = true;
+
+        /**
+         * API Key
+         */
+        private String apiKey;
+
+        /**
+         * 优先级（数字越小优先级越高，用于故障转移）
+         */
+        private Integer priority = 100;
+    }
+
+    /**
+     * 获取指定供应商配置
+     *
+     * @param providerType 供应商类型
+     * @return 配置对象，不存在返回 null
+     */
+    public ProviderConfig getProviderConfig(String providerType) {
+        return providers.get(providerType);
+    }
+
+    /**
+     * 检查供应商是否启用
+     *
+     * @param providerType 供应商类型
+     * @return true 如果启用且配置存在
+     */
+    public boolean isProviderEnabled(String providerType) {
+        ProviderConfig config = getProviderConfig(providerType);
+        return config != null && config.isEnabled();
+    }
+
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -19,12 +19,14 @@ import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
 import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
 import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
 import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
-import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
-import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
-import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
-import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
-import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
+import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProvider;
+import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProviderFactory;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
 import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
+import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
 import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
 import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
 import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
@@ -84,11 +86,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
    private TikHupService tikHupService;

    @Resource
-    private CosyVoiceClient cosyVoiceClient;
+    private VoiceCloneProviderFactory voiceProviderFactory;

    @Resource
    private CosyVoiceProperties cosyVoiceProperties;

+    @Resource
+    private VoiceProviderProperties voiceProviderProperties;
+
    @Resource
    private StringRedisTemplate stringRedisTemplate;

@@ -139,17 +144,20 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                .setTranscription(null); // 初始为空，表示未识别
        voiceMapper.insert(voice);

-        // 4. 调用阿里云语音复刻服务，生成 voice_id
+        // 4. 调用语音克隆服务，生成 voice_id
        try {
-            log.info("[createVoice][开始语音复刻，配音编号({})，文件ID({})]", voice.getId(), fileDO.getId());
+            log.info("[createVoice][开始语音复刻，配音编号({})，文件ID({})，供应商({})]",
+                    voice.getId(), fileDO.getId(), createReqVO.getProviderType());
            String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);

-            CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
-            cloneRequest.setTargetModel("cosyvoice-v3-flash"); // 使用v3-flash模型
+            // 使用 Provider 接口（支持前端选择供应商，不传则使用默认）
+            VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
+            VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
+            cloneRequest.setAudioUrl(fileAccessUrl);
+            cloneRequest.setModel("cosyvoice-v3-flash"); // 使用v3-flash模型
            cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀，格式要求
-            cloneRequest.setUrl(fileAccessUrl);

-            CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest);
+            VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
            String voiceId = cloneResult.getVoiceId();

            // 更新配音记录，保存 voice_id
@@ -432,22 +440,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
            return buildSynthResponseFromCache(reqVO, synthCache);
        }

-        CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
-                finalText,
-                voiceId,
-                fileUrl,
-                transcriptionText,
-                reqVO.getModel(),
-                reqVO.getSpeechRate(),
-                reqVO.getVolume(),
-                reqVO.getInstruction(),
-                reqVO.getSampleRate(),
-                reqVO.getAudioFormat(),
-                false
-        ));
+        // 使用 Provider 接口进行 TTS 合成（支持前端选择供应商，不传则使用默认）
+        VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
+        VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
+                .text(finalText)
+                .voiceId(voiceId)
+                .fileUrl(fileUrl)
+                .referenceText(transcriptionText)
+                .model(reqVO.getModel())
+                .speechRate(reqVO.getSpeechRate())
+                .volume(reqVO.getVolume())
+                .instruction(reqVO.getInstruction())
+                .sampleRate(reqVO.getSampleRate())
+                .audioFormat(reqVO.getAudioFormat())
+                .preview(false)
+                .build();
+
+        VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);

        String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
-        String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
+        String finalVoiceId = StrUtil.blankToDefault(voiceId, getDefaultVoiceId());

        // 【安全方案】不暴露OSS链接，直接返回Base64编码的音频数据
        String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
@@ -527,7 +539,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                voiceId = voice.getVoiceId();
                // 注意：使用 voiceId 时，不依赖 transcriptionText，直接使用前端传入的 inputText
                transcriptionText = null;  // 清除 transcriptionText
-                inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
+                inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
            } else {
                log.info("[previewVoice][使用文件URL试听，配音编号({})]", voiceConfigId);
                // 获取文件信息，用于获取文件URL
@@ -543,17 +555,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                    throw exception(VOICE_NOT_EXISTS, "配音识别文本为空，请先进行语音识别");
                }
                inputText = StrUtil.blankToDefault(reqVO.getInputText(),
-                        StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
+                        StrUtil.blankToDefault(transcriptionText, getPreviewText()));
            }
        }
        // 3. 如果没有配置ID，使用系统配音配置（需要前端传voiceId）
        else {
            log.info("[previewVoice][开始试听，使用系统配音配置，用户({})]", userId);
-            voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
+            voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
            if (StrUtil.isBlank(voiceId)) {
                throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
            }
-            inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
+            inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
        }
        
        String finalText = determineSynthesisText(
@@ -588,21 +600,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
            return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
        }

-        log.info("[previewVoice][调用CosyVoice合成，配音编号({})，voiceId({})，fileUrl({})，文本长度({})]",
-                voiceConfigId, voiceId, fileUrl, finalText.length());
-        CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
-                finalText,
-                voiceId,
-                fileUrl,
-                transcriptionText, // 参考音频文本，用于提高克隆质量
-                null, // 使用默认模型
-                speechRate,
-                volume,
-                instruction,
-                null,
-                audioFormat,
-                true
-        ));
+        log.info("[previewVoice][调用语音合成服务，配音编号({})，voiceId({})，fileUrl({})，文本长度({})，供应商({})]",
+                voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType());
+
+        // 使用 Provider 接口进行 TTS 合成（支持前端选择供应商，不传则使用默认）
+        VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
+        VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
+                .text(finalText)
+                .voiceId(voiceId)
+                .fileUrl(fileUrl)
+                .referenceText(transcriptionText)
+                .model(null) // 使用默认模型
+                .speechRate(speechRate)
+                .volume(volume)
+                .instruction(instruction)
+                .sampleRate(null)
+                .audioFormat(audioFormat)
+                .preview(true)
+                .build();
+
+        VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);

        String format = defaultFormat(ttsResult.getFormat(), audioFormat);
        String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
@@ -622,35 +639,53 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
        return buildPreviewResp(audioBase64, format, voiceId);
    }

-    private CosyVoiceTtsRequest buildTtsRequest(String text,
-                                                String voiceId,
-                                                String fileUrl,
-                                                String referenceText,
-                                                String model,
-                                                Float speechRate,
-                                                Float volume,
-                                                String instruction,
-                                                Integer sampleRate,
-                                                String audioFormat,
-                                                boolean preview) {
-        return CosyVoiceTtsRequest.builder()
-                .text(text)
-                .voiceId(voiceId)
-                .fileUrl(fileUrl)
-                .referenceText(referenceText)
-                .model(model)
-                .speechRate(speechRate)
-                .volume(volume)
-                .instruction(instruction)
-                .sampleRate(sampleRate)
-                .audioFormat(audioFormat)
-                .preview(preview)
-                .build();
+    /**
+     * 获取 CosyVoice 配置（统一入口）
+     * 优先使用新配置，回退到旧配置
+     */
+    private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() {
+        if (voiceProviderProperties != null) {
+            var config = voiceProviderProperties.getProviderConfig("cosyvoice");
+            if (config instanceof cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig cosyConfig) {
+                return cosyConfig;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * 获取默认音频格式
+     * 优先使用新配置，回退到旧配置
+     */
+    private String getDefaultFormat() {
+        var config = getCosyVoiceConfig();
+        if (config != null) {
+            return config.getAudioFormat();
+        }
+        if (cosyVoiceProperties != null) {
+            return cosyVoiceProperties.getAudioFormat();
+        }
+        return "mp3";
+    }
+
+    /**
+     * 获取默认采样率
+     * 优先使用新配置，回退到旧配置
+     */
+    private Integer getDefaultSampleRate() {
+        var config = getCosyVoiceConfig();
+        if (config != null) {
+            return config.getSampleRate();
+        }
+        if (cosyVoiceProperties != null) {
+            return cosyVoiceProperties.getSampleRate();
+        }
+        return 24000;
    }

    private String defaultFormat(String responseFormat, String requestFormat) {
        return StrUtil.blankToDefault(responseFormat,
-                StrUtil.blankToDefault(requestFormat, cosyVoiceProperties.getAudioFormat()));
+                StrUtil.blankToDefault(requestFormat, getDefaultFormat()));
    }

    private String buildFileName(String voiceId, String format) {
@@ -687,7 +722,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
            return builder.toString();
        }
        if (allowFallback) {
-            return cosyVoiceProperties.getPreviewText();
+            return getPreviewText();
        }
        throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
    }
@@ -750,15 +785,19 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
        } else {
            identifier = "no-voice";
        }
-        
+
+        // 获取默认配置
+        String defaultFormat = getDefaultFormat();
+        Integer defaultSampleRate = getDefaultSampleRate();
+
        String payload = StrUtil.join("|",
                identifier,
                text,
                speechRate != null ? speechRate : "1.0",
                volume != null ? volume : "0",
                instruction,
-                StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
-                sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
+                StrUtil.blankToDefault(audioFormat, defaultFormat),
+                sampleRate != null ? sampleRate : defaultSampleRate);
        String hash = cn.hutool.crypto.SecureUtil.sha256(payload);
        return prefix + hash;
    }
@@ -1123,5 +1162,35 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
        builder.append(normalized);
    }

+    /**
+     * 获取默认音色ID
+     * 优先使用新配置，回退到旧配置
+     */
+    private String getDefaultVoiceId() {
+        var config = getCosyVoiceConfig();
+        if (config != null) {
+            return config.getDefaultVoiceId();
+        }
+        if (cosyVoiceProperties != null) {
+            return cosyVoiceProperties.getDefaultVoiceId();
+        }
+        return null;
+    }
+
+    /**
+     * 获取试听文本
+     * 优先使用新配置，回退到旧配置
+     */
+    private String getPreviewText() {
+        var config = getCosyVoiceConfig();
+        if (config != null) {
+            return config.getPreviewText();
+        }
+        if (cosyVoiceProperties != null) {
+            return cosyVoiceProperties.getPreviewText();
+        }
+        return "您好，欢迎体验专属音色。";
+    }
+
 }

--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
@@ -34,5 +34,7 @@ public class AppTikUserVoiceCreateReqVO {
    @Schema(description = "备注", example = "这是一个测试配音")
    private String note;

-}
+    @Schema(description = "供应商类型：cosyvoice-阿里云，siliconflow-硅基流动（不传则使用默认）", example = "cosyvoice")
+    private String providerType;

+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
@@ -42,6 +42,8 @@ public class AppTikVoicePreviewReqVO {

    @Schema(description = "指令（用于控制音色风格）", example = "请用温柔专业的语调朗读")
    private String instruction;
+
+    @Schema(description = "供应商类型：cosyvoice-阿里云，siliconflow-硅基流动（不传则使用默认）", example = "cosyvoice")
+    private String providerType;
+
 }
-
-
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
@@ -44,6 +44,8 @@ public class AppTikVoiceTtsReqVO {

    @Schema(description = "音频格式，默认 wav，可选 mp3")
    private String audioFormat;
+
+    @Schema(description = "供应商类型：cosyvoice-阿里云，siliconflow-硅基流动（不传则使用默认）", example = "cosyvoice")
+    private String providerType;
+
 }
-
-