feat: 功能优化
This commit is contained in:
@@ -0,0 +1,160 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* CosyVoice Provider 实现
|
||||
*
|
||||
* <p>阿里云 CosyVoice 语音服务的 Provider 实现。
|
||||
* 内部委托给 {@link CosyVoiceClient} 进行实际的API调用。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class CosyVoiceProvider implements VoiceCloneProvider {
|
||||
|
||||
private final CosyVoiceClient cosyVoiceClient;
|
||||
|
||||
/**
|
||||
* 新配置(支持多供应商)
|
||||
*/
|
||||
private final VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
/**
|
||||
* 旧配置(向后兼容)
|
||||
*/
|
||||
private final CosyVoiceProperties cosyVoiceProperties;
|
||||
|
||||
/**
|
||||
* 获取 CosyVoice 配置
|
||||
* 优先使用新配置,如果不存在则使用旧配置(向后兼容)
|
||||
*/
|
||||
private CosyVoiceProviderConfig getConfig() {
|
||||
// 尝试从新配置获取
|
||||
var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice");
|
||||
if (baseConfig instanceof CosyVoiceProviderConfig cosyConfig) {
|
||||
return cosyConfig;
|
||||
}
|
||||
|
||||
// 回退到旧配置(向后兼容)
|
||||
if (cosyVoiceProperties != null && cosyVoiceProperties.isEnabled()) {
|
||||
return migrateFromLegacyConfig(cosyVoiceProperties);
|
||||
}
|
||||
|
||||
// 返回空配置
|
||||
return new CosyVoiceProviderConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* 从旧配置迁移到新配置格式
|
||||
*/
|
||||
private CosyVoiceProviderConfig migrateFromLegacyConfig(CosyVoiceProperties legacy) {
|
||||
var config = new CosyVoiceProviderConfig();
|
||||
config.setEnabled(true);
|
||||
config.setApiKey(legacy.getApiKey());
|
||||
config.setDefaultModel(legacy.getDefaultModel());
|
||||
config.setDefaultVoiceId(legacy.getDefaultVoiceId());
|
||||
config.setSampleRate(legacy.getSampleRate());
|
||||
config.setAudioFormat(legacy.getAudioFormat());
|
||||
config.setPreviewText(legacy.getPreviewText());
|
||||
config.setTtsUrl(legacy.getTtsUrl());
|
||||
config.setVoiceEnrollmentUrl(legacy.getVoiceEnrollmentUrl());
|
||||
config.setConnectTimeout(legacy.getConnectTimeout());
|
||||
config.setReadTimeout(legacy.getReadTimeout());
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
|
||||
log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]",
|
||||
request.getAudioUrl(), request.getModel());
|
||||
|
||||
// 适配到 CosyVoiceCloneRequest
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest cosyRequest =
|
||||
new cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest();
|
||||
|
||||
cosyRequest.setUrl(request.getAudioUrl());
|
||||
cosyRequest.setTargetModel(request.getModel());
|
||||
cosyRequest.setPrefix(request.getPrefix());
|
||||
if (request.getSampleRate() != null) {
|
||||
cosyRequest.setSampleRate(request.getSampleRate());
|
||||
}
|
||||
if (request.getAudioFormat() != null) {
|
||||
cosyRequest.setAudioFormat(request.getAudioFormat());
|
||||
}
|
||||
|
||||
// 调用底层 Client
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult cosyResult =
|
||||
cosyVoiceClient.cloneVoice(cosyRequest);
|
||||
|
||||
// 适配到统一 Result
|
||||
VoiceCloneResult result = new VoiceCloneResult();
|
||||
result.setVoiceId(cosyResult.getVoiceId());
|
||||
result.setRequestId(cosyResult.getRequestId());
|
||||
|
||||
log.info("[CosyVoiceProvider][语音克隆成功][voiceId={}]", result.getVoiceId());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceTtsResult synthesize(VoiceTtsRequest request) {
|
||||
log.info("[CosyVoiceProvider][语音合成][voiceId={}, textLength={}, model={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText() != null ? request.getText().length() : 0,
|
||||
request.getModel());
|
||||
|
||||
// 适配到 CosyVoiceTtsRequest
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest cosyRequest =
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest.builder()
|
||||
.text(request.getText())
|
||||
.voiceId(request.getVoiceId())
|
||||
.fileUrl(request.getFileUrl())
|
||||
.referenceText(request.getReferenceText())
|
||||
.model(request.getModel())
|
||||
.speechRate(request.getSpeechRate())
|
||||
.volume(request.getVolume())
|
||||
.instruction(request.getInstruction())
|
||||
.sampleRate(request.getSampleRate())
|
||||
.audioFormat(request.getAudioFormat())
|
||||
.preview(request.isPreview())
|
||||
.build();
|
||||
|
||||
// 调用底层 Client
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult =
|
||||
cosyVoiceClient.synthesize(cosyRequest);
|
||||
|
||||
// 适配到统一 Result
|
||||
VoiceTtsResult result = new VoiceTtsResult();
|
||||
result.setRequestId(cosyResult.getRequestId());
|
||||
result.setFormat(cosyResult.getFormat());
|
||||
result.setSampleRate(cosyResult.getSampleRate());
|
||||
result.setAudio(cosyResult.getAudio());
|
||||
result.setVoiceId(cosyResult.getVoiceId());
|
||||
|
||||
log.info("[CosyVoiceProvider][语音合成成功][format={}, audioSize={}]",
|
||||
result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supports(String providerType) {
|
||||
return "cosyvoice".equalsIgnoreCase(providerType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProviderType() {
|
||||
return "cosyvoice";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
|
||||
/**
|
||||
* 语音克隆 Provider 统一接口
|
||||
*
|
||||
* <p>支持多供应商实现的语音克隆和语音合成服务。
|
||||
* 通过工厂类 {@link VoiceCloneProviderFactory} 获取具体实现。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
public interface VoiceCloneProvider {
|
||||
|
||||
/**
|
||||
* 语音克隆
|
||||
*
|
||||
* <p>根据提供的音频文件URL,克隆目标音色。
|
||||
* 不同供应商的实现细节被此接口屏蔽。
|
||||
*
|
||||
* @param request 语音克隆请求
|
||||
* @return 语音克隆结果,包含生成的 voiceId
|
||||
* @throws RuntimeException 当克隆失败时抛出
|
||||
*/
|
||||
VoiceCloneResult cloneVoice(VoiceCloneRequest request);
|
||||
|
||||
/**
|
||||
* 文本转语音合成
|
||||
*
|
||||
* <p>将文本转换为语音,支持使用已克隆的音色或系统音色。
|
||||
*
|
||||
* @param request 语音合成请求
|
||||
* @return 语音合成结果,包含音频数据
|
||||
* @throws RuntimeException 当合成失败时抛出
|
||||
*/
|
||||
VoiceTtsResult synthesize(VoiceTtsRequest request);
|
||||
|
||||
/**
|
||||
* 检查是否支持指定的供应商类型
|
||||
*
|
||||
* @param providerType 供应商类型(如 "cosyvoice", "siliconflow")
|
||||
* @return true 如果支持,false 否则
|
||||
*/
|
||||
boolean supports(String providerType);
|
||||
|
||||
/**
|
||||
* 获取供应商类型标识
|
||||
*
|
||||
* @return 供应商类型,如 "cosyvoice", "siliconflow"
|
||||
*/
|
||||
String getProviderType();
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.exception.ServiceException;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
|
||||
import static cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants.VOICE_TTS_FAILED;
|
||||
|
||||
/**
|
||||
* 语音克隆 Provider 工厂
|
||||
*
|
||||
* <p>负责管理和获取不同的语音克隆 Provider 实现。
|
||||
* 支持多供应商配置和动态切换。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class VoiceCloneProviderFactory {
|
||||
|
||||
private final Map<String, VoiceCloneProvider> providers = new ConcurrentHashMap<>();
|
||||
|
||||
@Autowired
|
||||
public VoiceCloneProviderFactory(List<VoiceCloneProvider> providerList) {
|
||||
// 自动注册所有 Provider 实现类
|
||||
for (VoiceCloneProvider provider : providerList) {
|
||||
registerProvider(provider);
|
||||
log.info("[VoiceCloneProviderFactory][注册Provider][type={}]", provider.getProviderType());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 注册 Provider
|
||||
*
|
||||
* @param provider Provider 实例
|
||||
*/
|
||||
public void registerProvider(VoiceCloneProvider provider) {
|
||||
String type = provider.getProviderType();
|
||||
if (providers.containsKey(type)) {
|
||||
log.warn("[VoiceCloneProviderFactory][Provider已存在,覆盖][type={}]", type);
|
||||
}
|
||||
providers.put(type, provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取默认 Provider
|
||||
*
|
||||
* @return 默认的 Provider 实例
|
||||
* @throws ServiceException 当没有可用的 Provider 时抛出
|
||||
*/
|
||||
public VoiceCloneProvider getDefaultProvider() {
|
||||
if (providers.isEmpty()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置任何语音克隆 Provider");
|
||||
}
|
||||
// 返回第一个注册的 Provider 作为默认
|
||||
return providers.values().iterator().next();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据类型获取 Provider
|
||||
*
|
||||
* @param providerType 供应商类型(如 "cosyvoice", "siliconflow")
|
||||
* @return 对应的 Provider 实例
|
||||
* @throws ServiceException 当 Provider 不存在时抛出
|
||||
*/
|
||||
public VoiceCloneProvider getProvider(String providerType) {
|
||||
if (providerType == null || providerType.trim().isEmpty()) {
|
||||
return getDefaultProvider();
|
||||
}
|
||||
|
||||
VoiceCloneProvider provider = providers.get(providerType);
|
||||
if (provider == null) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "不支持的语音克隆供应商: " + providerType);
|
||||
}
|
||||
|
||||
return provider;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否支持指定的供应商类型
|
||||
*
|
||||
* @param providerType 供应商类型
|
||||
* @return true 如果支持,false 否则
|
||||
*/
|
||||
public boolean hasProvider(String providerType) {
|
||||
return providerType != null && providers.containsKey(providerType);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有已注册的 Provider 类型
|
||||
*
|
||||
* @return 供应商类型列表
|
||||
*/
|
||||
public List<String> getAvailableProviderTypes() {
|
||||
return List.copyOf(providers.keySet());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 语音克隆请求(统一DTO)
|
||||
*
|
||||
* <p>屏蔽不同供应商API差异,提供统一的请求结构。
|
||||
* 各Provider实现负责将此DTO转换为供应商特定格式。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
public class VoiceCloneRequest {
|
||||
|
||||
/**
|
||||
* 音频文件公网URL
|
||||
*
|
||||
* <p>CosyVoice: 对应 {@code url} 字段</p>
|
||||
* <p>SiliconFlow: 对应 {@code audio} 字段(需base64编码)</p>
|
||||
*/
|
||||
private String audioUrl;
|
||||
|
||||
/**
|
||||
* 模型名称
|
||||
*
|
||||
* <p>CosyVoice: 对应 {@code targetModel},如 {@code cosyvoice-v3-flash}</p>
|
||||
* <p>SiliconFlow: 对应 {@code model},如 {@code indextts-2}</p>
|
||||
*/
|
||||
private String model;
|
||||
|
||||
/**
|
||||
* 音色自定义前缀(可选)
|
||||
*
|
||||
* <p>CosyVoice: 必填,仅允许数字和小写字母,长度<10字符</p>
|
||||
* <p>SiliconFlow: 不适用</p>
|
||||
*/
|
||||
private String prefix;
|
||||
|
||||
/**
|
||||
* 采样率,默认24000
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频格式,默认mp3
|
||||
*
|
||||
* <p>可选值: mp3, wav, flac</p>
|
||||
*/
|
||||
private String audioFormat;
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 语音克隆结果(统一DTO)
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
public class VoiceCloneResult {
|
||||
|
||||
/**
|
||||
* 生成的音色ID
|
||||
*
|
||||
* <p>后续TTS合成时使用此ID</p>
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 请求ID(用于追踪)
|
||||
*/
|
||||
private String requestId;
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 文本转语音请求(统一DTO)
|
||||
*
|
||||
* <p>屏蔽不同供应商API差异,提供统一的请求结构。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class VoiceTtsRequest {
|
||||
|
||||
/**
|
||||
* 待合成文本
|
||||
*/
|
||||
private String text;
|
||||
|
||||
/**
|
||||
* 音色ID(可选,默认使用配置)
|
||||
*
|
||||
* <p>使用语音克隆生成的voiceId</p>
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 语音文件URL(当使用语音URL合成时使用,替代voiceId)
|
||||
*
|
||||
* <p>用于实时语音克隆,无需提前克隆</p>
|
||||
*/
|
||||
private String fileUrl;
|
||||
|
||||
/**
|
||||
* 参考音频文本(当使用fileUrl时,用于提高克隆质量)
|
||||
*/
|
||||
private String referenceText;
|
||||
|
||||
/**
|
||||
* 模型(默认使用供应商默认模型)
|
||||
*/
|
||||
private String model;
|
||||
|
||||
/**
|
||||
* 语速(0.5 - 2.0,默认1.0)
|
||||
*/
|
||||
private Float speechRate;
|
||||
|
||||
/**
|
||||
* 音量(-100 - 100,默认0)
|
||||
*/
|
||||
private Float volume;
|
||||
|
||||
/**
|
||||
* 指令(用于控制音色风格),可选
|
||||
*/
|
||||
private String instruction;
|
||||
|
||||
/**
|
||||
* 采样率(默认24000)
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频格式(默认mp3)
|
||||
*
|
||||
* <p>可选值: mp3, wav, flac</p>
|
||||
*/
|
||||
private String audioFormat;
|
||||
|
||||
/**
|
||||
* 是否仅用于试听(方便服务侧做限流)
|
||||
*/
|
||||
private boolean preview;
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 文本转语音结果(统一DTO)
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
public class VoiceTtsResult {
|
||||
|
||||
/**
|
||||
* 请求ID(用于追踪)
|
||||
*/
|
||||
private String requestId;
|
||||
|
||||
/**
|
||||
* 返回的音频格式
|
||||
*
|
||||
* <p>mp3, wav, flac 等</p>
|
||||
*/
|
||||
private String format;
|
||||
|
||||
/**
|
||||
* 采样率
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频二进制内容
|
||||
*/
|
||||
private byte[] audio;
|
||||
|
||||
/**
|
||||
* 音频所使用的 voiceId
|
||||
*/
|
||||
private String voiceId;
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* CosyVoice 供应商配置
|
||||
*
|
||||
* <p>继承通用配置,添加 CosyVoice 特有字段。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class CosyVoiceProviderConfig extends VoiceProviderProperties.ProviderConfig {
|
||||
|
||||
/**
|
||||
* 默认模型
|
||||
*/
|
||||
private String defaultModel = "cosyvoice-v3-flash";
|
||||
|
||||
/**
|
||||
* 默认 voiceId(可选)
|
||||
*/
|
||||
private String defaultVoiceId;
|
||||
|
||||
/**
|
||||
* 默认采样率
|
||||
*/
|
||||
private Integer sampleRate = 24000;
|
||||
|
||||
/**
|
||||
* 默认音频格式
|
||||
*/
|
||||
private String audioFormat = "mp3";
|
||||
|
||||
/**
|
||||
* 试听默认示例文本
|
||||
*/
|
||||
private String previewText = "您好,欢迎体验专属音色。";
|
||||
|
||||
/**
|
||||
* TTS 接口地址
|
||||
*/
|
||||
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
|
||||
|
||||
/**
|
||||
* 语音复刻接口地址(声音注册)
|
||||
*/
|
||||
private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
private Duration connectTimeout = Duration.ofSeconds(10);
|
||||
|
||||
/**
|
||||
* 读取超时时间(3分钟,提升语音合成成功率)
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(180);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 语音 Provider 统一配置
|
||||
*
|
||||
* <p>支持多供应商配置,默认供应商选择。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "yudao.voice")
|
||||
public class VoiceProviderProperties {
|
||||
|
||||
/**
|
||||
* 默认供应商类型
|
||||
*
|
||||
* <p>可选值: cosyvoice, siliconflow 等
|
||||
*/
|
||||
private String defaultProvider = "cosyvoice";
|
||||
|
||||
/**
|
||||
* 各供应商配置
|
||||
*
|
||||
* <p>key 为供应商类型(如 cosyvoice, siliconflow)
|
||||
*/
|
||||
private Map<String, ProviderConfig> providers = new HashMap<>();
|
||||
|
||||
/**
|
||||
* 供应商通用配置基类
|
||||
*/
|
||||
@Data
|
||||
public static class ProviderConfig {
|
||||
/**
|
||||
* 是否启用
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
/**
|
||||
* API Key
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* 优先级(数字越小优先级越高,用于故障转移)
|
||||
*/
|
||||
private Integer priority = 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取指定供应商配置
|
||||
*
|
||||
* @param providerType 供应商类型
|
||||
* @return 配置对象,不存在返回 null
|
||||
*/
|
||||
public ProviderConfig getProviderConfig(String providerType) {
|
||||
return providers.get(providerType);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查供应商是否启用
|
||||
*
|
||||
* @param providerType 供应商类型
|
||||
* @return true 如果启用且配置存在
|
||||
*/
|
||||
public boolean isProviderEnabled(String providerType) {
|
||||
ProviderConfig config = getProviderConfig(providerType);
|
||||
return config != null && config.isEnabled();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -19,12 +19,14 @@ import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
|
||||
import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
|
||||
import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProvider;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProviderFactory;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
|
||||
@@ -84,11 +86,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
private TikHupService tikHupService;
|
||||
|
||||
@Resource
|
||||
private CosyVoiceClient cosyVoiceClient;
|
||||
private VoiceCloneProviderFactory voiceProviderFactory;
|
||||
|
||||
@Resource
|
||||
private CosyVoiceProperties cosyVoiceProperties;
|
||||
|
||||
@Resource
|
||||
private VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
@Resource
|
||||
private StringRedisTemplate stringRedisTemplate;
|
||||
|
||||
@@ -139,17 +144,20 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
.setTranscription(null); // 初始为空,表示未识别
|
||||
voiceMapper.insert(voice);
|
||||
|
||||
// 4. 调用阿里云语音复刻服务,生成 voice_id
|
||||
// 4. 调用语音克隆服务,生成 voice_id
|
||||
try {
|
||||
log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({})]", voice.getId(), fileDO.getId());
|
||||
log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]",
|
||||
voice.getId(), fileDO.getId(), createReqVO.getProviderType());
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
|
||||
cloneRequest.setTargetModel("cosyvoice-v3-flash"); // 使用v3-flash模型
|
||||
// 使用 Provider 接口(支持前端选择供应商,不传则使用默认)
|
||||
VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
|
||||
VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
|
||||
cloneRequest.setAudioUrl(fileAccessUrl);
|
||||
cloneRequest.setModel("cosyvoice-v3-flash"); // 使用v3-flash模型
|
||||
cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求
|
||||
cloneRequest.setUrl(fileAccessUrl);
|
||||
|
||||
CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest);
|
||||
VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
|
||||
String voiceId = cloneResult.getVoiceId();
|
||||
|
||||
// 更新配音记录,保存 voice_id
|
||||
@@ -432,22 +440,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return buildSynthResponseFromCache(reqVO, synthCache);
|
||||
}
|
||||
|
||||
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
|
||||
finalText,
|
||||
voiceId,
|
||||
fileUrl,
|
||||
transcriptionText,
|
||||
reqVO.getModel(),
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
reqVO.getInstruction(),
|
||||
reqVO.getSampleRate(),
|
||||
reqVO.getAudioFormat(),
|
||||
false
|
||||
));
|
||||
// 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认)
|
||||
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
|
||||
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
|
||||
.text(finalText)
|
||||
.voiceId(voiceId)
|
||||
.fileUrl(fileUrl)
|
||||
.referenceText(transcriptionText)
|
||||
.model(reqVO.getModel())
|
||||
.speechRate(reqVO.getSpeechRate())
|
||||
.volume(reqVO.getVolume())
|
||||
.instruction(reqVO.getInstruction())
|
||||
.sampleRate(reqVO.getSampleRate())
|
||||
.audioFormat(reqVO.getAudioFormat())
|
||||
.preview(false)
|
||||
.build();
|
||||
|
||||
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
|
||||
|
||||
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
|
||||
String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
|
||||
String finalVoiceId = StrUtil.blankToDefault(voiceId, getDefaultVoiceId());
|
||||
|
||||
// 【安全方案】不暴露OSS链接,直接返回Base64编码的音频数据
|
||||
String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
|
||||
@@ -527,7 +539,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
voiceId = voice.getVoiceId();
|
||||
// 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText
|
||||
transcriptionText = null; // 清除 transcriptionText
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
|
||||
} else {
|
||||
log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId);
|
||||
// 获取文件信息,用于获取文件URL
|
||||
@@ -543,17 +555,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
|
||||
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
|
||||
StrUtil.blankToDefault(transcriptionText, getPreviewText()));
|
||||
}
|
||||
}
|
||||
// 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId)
|
||||
else {
|
||||
log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId);
|
||||
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
|
||||
if (StrUtil.isBlank(voiceId)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
|
||||
}
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
|
||||
}
|
||||
|
||||
String finalText = determineSynthesisText(
|
||||
@@ -588,21 +600,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
|
||||
}
|
||||
|
||||
log.info("[previewVoice][调用CosyVoice合成,配音编号({}),voiceId({}),fileUrl({}),文本长度({})]",
|
||||
voiceConfigId, voiceId, fileUrl, finalText.length());
|
||||
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
|
||||
finalText,
|
||||
voiceId,
|
||||
fileUrl,
|
||||
transcriptionText, // 参考音频文本,用于提高克隆质量
|
||||
null, // 使用默认模型
|
||||
speechRate,
|
||||
volume,
|
||||
instruction,
|
||||
null,
|
||||
audioFormat,
|
||||
true
|
||||
));
|
||||
log.info("[previewVoice][调用语音合成服务,配音编号({}),voiceId({}),fileUrl({}),文本长度({}),供应商({})]",
|
||||
voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType());
|
||||
|
||||
// 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认)
|
||||
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
|
||||
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
|
||||
.text(finalText)
|
||||
.voiceId(voiceId)
|
||||
.fileUrl(fileUrl)
|
||||
.referenceText(transcriptionText)
|
||||
.model(null) // 使用默认模型
|
||||
.speechRate(speechRate)
|
||||
.volume(volume)
|
||||
.instruction(instruction)
|
||||
.sampleRate(null)
|
||||
.audioFormat(audioFormat)
|
||||
.preview(true)
|
||||
.build();
|
||||
|
||||
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
|
||||
|
||||
String format = defaultFormat(ttsResult.getFormat(), audioFormat);
|
||||
String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
|
||||
@@ -622,35 +639,53 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return buildPreviewResp(audioBase64, format, voiceId);
|
||||
}
|
||||
|
||||
private CosyVoiceTtsRequest buildTtsRequest(String text,
|
||||
String voiceId,
|
||||
String fileUrl,
|
||||
String referenceText,
|
||||
String model,
|
||||
Float speechRate,
|
||||
Float volume,
|
||||
String instruction,
|
||||
Integer sampleRate,
|
||||
String audioFormat,
|
||||
boolean preview) {
|
||||
return CosyVoiceTtsRequest.builder()
|
||||
.text(text)
|
||||
.voiceId(voiceId)
|
||||
.fileUrl(fileUrl)
|
||||
.referenceText(referenceText)
|
||||
.model(model)
|
||||
.speechRate(speechRate)
|
||||
.volume(volume)
|
||||
.instruction(instruction)
|
||||
.sampleRate(sampleRate)
|
||||
.audioFormat(audioFormat)
|
||||
.preview(preview)
|
||||
.build();
|
||||
/**
|
||||
* 获取 CosyVoice 配置(统一入口)
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() {
|
||||
if (voiceProviderProperties != null) {
|
||||
var config = voiceProviderProperties.getProviderConfig("cosyvoice");
|
||||
if (config instanceof cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig cosyConfig) {
|
||||
return cosyConfig;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取默认音频格式
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private String getDefaultFormat() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getAudioFormat();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getAudioFormat();
|
||||
}
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取默认采样率
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private Integer getDefaultSampleRate() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getSampleRate();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getSampleRate();
|
||||
}
|
||||
return 24000;
|
||||
}
|
||||
|
||||
private String defaultFormat(String responseFormat, String requestFormat) {
|
||||
return StrUtil.blankToDefault(responseFormat,
|
||||
StrUtil.blankToDefault(requestFormat, cosyVoiceProperties.getAudioFormat()));
|
||||
StrUtil.blankToDefault(requestFormat, getDefaultFormat()));
|
||||
}
|
||||
|
||||
private String buildFileName(String voiceId, String format) {
|
||||
@@ -687,7 +722,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return builder.toString();
|
||||
}
|
||||
if (allowFallback) {
|
||||
return cosyVoiceProperties.getPreviewText();
|
||||
return getPreviewText();
|
||||
}
|
||||
throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
|
||||
}
|
||||
@@ -750,15 +785,19 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
} else {
|
||||
identifier = "no-voice";
|
||||
}
|
||||
|
||||
|
||||
// 获取默认配置
|
||||
String defaultFormat = getDefaultFormat();
|
||||
Integer defaultSampleRate = getDefaultSampleRate();
|
||||
|
||||
String payload = StrUtil.join("|",
|
||||
identifier,
|
||||
text,
|
||||
speechRate != null ? speechRate : "1.0",
|
||||
volume != null ? volume : "0",
|
||||
instruction,
|
||||
StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
|
||||
sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
|
||||
StrUtil.blankToDefault(audioFormat, defaultFormat),
|
||||
sampleRate != null ? sampleRate : defaultSampleRate);
|
||||
String hash = cn.hutool.crypto.SecureUtil.sha256(payload);
|
||||
return prefix + hash;
|
||||
}
|
||||
@@ -1123,5 +1162,35 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
builder.append(normalized);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取默认音色ID
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private String getDefaultVoiceId() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getDefaultVoiceId();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getDefaultVoiceId();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取试听文本
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private String getPreviewText() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getPreviewText();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getPreviewText();
|
||||
}
|
||||
return "您好,欢迎体验专属音色。";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -34,5 +34,7 @@ public class AppTikUserVoiceCreateReqVO {
|
||||
@Schema(description = "备注", example = "这是一个测试配音")
|
||||
private String note;
|
||||
|
||||
}
|
||||
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
|
||||
private String providerType;
|
||||
|
||||
}
|
||||
|
||||
@@ -42,6 +42,8 @@ public class AppTikVoicePreviewReqVO {
|
||||
|
||||
@Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读")
|
||||
private String instruction;
|
||||
|
||||
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
|
||||
private String providerType;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -44,6 +44,8 @@ public class AppTikVoiceTtsReqVO {
|
||||
|
||||
@Schema(description = "音频格式,默认 wav,可选 mp3")
|
||||
private String audioFormat;
|
||||
|
||||
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
|
||||
private String providerType;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user