feat: 功能优化

This commit is contained in:
2026-01-27 01:39:08 +08:00
parent bf12e70339
commit 24f66c8e81
24 changed files with 1570 additions and 133 deletions

View File

@@ -0,0 +1,160 @@
package cn.iocoder.yudao.module.tik.voice.client;
import cn.hutool.core.util.StrUtil;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
/**
* CosyVoice Provider 实现
*
* <p>阿里云 CosyVoice 语音服务的 Provider 实现。
* 内部委托给 {@link CosyVoiceClient} 进行实际的API调用。
*
* @author 芋道源码
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class CosyVoiceProvider implements VoiceCloneProvider {
private final CosyVoiceClient cosyVoiceClient;
/**
* 新配置(支持多供应商)
*/
private final VoiceProviderProperties voiceProviderProperties;
/**
* 旧配置(向后兼容)
*/
private final CosyVoiceProperties cosyVoiceProperties;
/**
* 获取 CosyVoice 配置
* 优先使用新配置,如果不存在则使用旧配置(向后兼容)
*/
private CosyVoiceProviderConfig getConfig() {
// 尝试从新配置获取
var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice");
if (baseConfig instanceof CosyVoiceProviderConfig cosyConfig) {
return cosyConfig;
}
// 回退到旧配置(向后兼容)
if (cosyVoiceProperties != null && cosyVoiceProperties.isEnabled()) {
return migrateFromLegacyConfig(cosyVoiceProperties);
}
// 返回空配置
return new CosyVoiceProviderConfig();
}
/**
* 从旧配置迁移到新配置格式
*/
private CosyVoiceProviderConfig migrateFromLegacyConfig(CosyVoiceProperties legacy) {
var config = new CosyVoiceProviderConfig();
config.setEnabled(true);
config.setApiKey(legacy.getApiKey());
config.setDefaultModel(legacy.getDefaultModel());
config.setDefaultVoiceId(legacy.getDefaultVoiceId());
config.setSampleRate(legacy.getSampleRate());
config.setAudioFormat(legacy.getAudioFormat());
config.setPreviewText(legacy.getPreviewText());
config.setTtsUrl(legacy.getTtsUrl());
config.setVoiceEnrollmentUrl(legacy.getVoiceEnrollmentUrl());
config.setConnectTimeout(legacy.getConnectTimeout());
config.setReadTimeout(legacy.getReadTimeout());
return config;
}
@Override
public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]",
request.getAudioUrl(), request.getModel());
// 适配到 CosyVoiceCloneRequest
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest cosyRequest =
new cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest();
cosyRequest.setUrl(request.getAudioUrl());
cosyRequest.setTargetModel(request.getModel());
cosyRequest.setPrefix(request.getPrefix());
if (request.getSampleRate() != null) {
cosyRequest.setSampleRate(request.getSampleRate());
}
if (request.getAudioFormat() != null) {
cosyRequest.setAudioFormat(request.getAudioFormat());
}
// 调用底层 Client
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult cosyResult =
cosyVoiceClient.cloneVoice(cosyRequest);
// 适配到统一 Result
VoiceCloneResult result = new VoiceCloneResult();
result.setVoiceId(cosyResult.getVoiceId());
result.setRequestId(cosyResult.getRequestId());
log.info("[CosyVoiceProvider][语音克隆成功][voiceId={}]", result.getVoiceId());
return result;
}
@Override
public VoiceTtsResult synthesize(VoiceTtsRequest request) {
log.info("[CosyVoiceProvider][语音合成][voiceId={}, textLength={}, model={}]",
request.getVoiceId(),
request.getText() != null ? request.getText().length() : 0,
request.getModel());
// 适配到 CosyVoiceTtsRequest
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest cosyRequest =
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest.builder()
.text(request.getText())
.voiceId(request.getVoiceId())
.fileUrl(request.getFileUrl())
.referenceText(request.getReferenceText())
.model(request.getModel())
.speechRate(request.getSpeechRate())
.volume(request.getVolume())
.instruction(request.getInstruction())
.sampleRate(request.getSampleRate())
.audioFormat(request.getAudioFormat())
.preview(request.isPreview())
.build();
// 调用底层 Client
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult =
cosyVoiceClient.synthesize(cosyRequest);
// 适配到统一 Result
VoiceTtsResult result = new VoiceTtsResult();
result.setRequestId(cosyResult.getRequestId());
result.setFormat(cosyResult.getFormat());
result.setSampleRate(cosyResult.getSampleRate());
result.setAudio(cosyResult.getAudio());
result.setVoiceId(cosyResult.getVoiceId());
log.info("[CosyVoiceProvider][语音合成成功][format={}, audioSize={}]",
result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0);
return result;
}
@Override
public boolean supports(String providerType) {
return "cosyvoice".equalsIgnoreCase(providerType);
}
@Override
public String getProviderType() {
return "cosyvoice";
}
}

View File

@@ -0,0 +1,55 @@
package cn.iocoder.yudao.module.tik.voice.client;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
/**
* 语音克隆 Provider 统一接口
*
* <p>支持多供应商实现的语音克隆和语音合成服务。
* 通过工厂类 {@link VoiceCloneProviderFactory} 获取具体实现。
*
* @author 芋道源码
*/
public interface VoiceCloneProvider {
/**
* 语音克隆
*
* <p>根据提供的音频文件URL克隆目标音色。
* 不同供应商的实现细节被此接口屏蔽。
*
* @param request 语音克隆请求
* @return 语音克隆结果,包含生成的 voiceId
* @throws RuntimeException 当克隆失败时抛出
*/
VoiceCloneResult cloneVoice(VoiceCloneRequest request);
/**
* 文本转语音合成
*
* <p>将文本转换为语音,支持使用已克隆的音色或系统音色。
*
* @param request 语音合成请求
* @return 语音合成结果,包含音频数据
* @throws RuntimeException 当合成失败时抛出
*/
VoiceTtsResult synthesize(VoiceTtsRequest request);
/**
* 检查是否支持指定的供应商类型
*
* @param providerType 供应商类型(如 "cosyvoice", "siliconflow"
* @return true 如果支持false 否则
*/
boolean supports(String providerType);
/**
* 获取供应商类型标识
*
* @return 供应商类型,如 "cosyvoice", "siliconflow"
*/
String getProviderType();
}

View File

@@ -0,0 +1,104 @@
package cn.iocoder.yudao.module.tik.voice.client;
import cn.iocoder.yudao.framework.common.exception.ServiceException;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
import static cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants.VOICE_TTS_FAILED;
/**
* 语音克隆 Provider 工厂
*
* <p>负责管理和获取不同的语音克隆 Provider 实现。
* 支持多供应商配置和动态切换。
*
* @author 芋道源码
*/
@Slf4j
@Component
public class VoiceCloneProviderFactory {
private final Map<String, VoiceCloneProvider> providers = new ConcurrentHashMap<>();
@Autowired
public VoiceCloneProviderFactory(List<VoiceCloneProvider> providerList) {
// 自动注册所有 Provider 实现类
for (VoiceCloneProvider provider : providerList) {
registerProvider(provider);
log.info("[VoiceCloneProviderFactory][注册Provider][type={}]", provider.getProviderType());
}
}
/**
* 注册 Provider
*
* @param provider Provider 实例
*/
public void registerProvider(VoiceCloneProvider provider) {
String type = provider.getProviderType();
if (providers.containsKey(type)) {
log.warn("[VoiceCloneProviderFactory][Provider已存在覆盖][type={}]", type);
}
providers.put(type, provider);
}
/**
* 获取默认 Provider
*
* @return 默认的 Provider 实例
* @throws ServiceException 当没有可用的 Provider 时抛出
*/
public VoiceCloneProvider getDefaultProvider() {
if (providers.isEmpty()) {
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置任何语音克隆 Provider");
}
// 返回第一个注册的 Provider 作为默认
return providers.values().iterator().next();
}
/**
* 根据类型获取 Provider
*
* @param providerType 供应商类型(如 "cosyvoice", "siliconflow"
* @return 对应的 Provider 实例
* @throws ServiceException 当 Provider 不存在时抛出
*/
public VoiceCloneProvider getProvider(String providerType) {
if (providerType == null || providerType.trim().isEmpty()) {
return getDefaultProvider();
}
VoiceCloneProvider provider = providers.get(providerType);
if (provider == null) {
throw exception0(VOICE_TTS_FAILED.getCode(), "不支持的语音克隆供应商: " + providerType);
}
return provider;
}
/**
* 检查是否支持指定的供应商类型
*
* @param providerType 供应商类型
* @return true 如果支持false 否则
*/
public boolean hasProvider(String providerType) {
return providerType != null && providers.containsKey(providerType);
}
/**
* 获取所有已注册的 Provider 类型
*
* @return 供应商类型列表
*/
public List<String> getAvailableProviderTypes() {
return List.copyOf(providers.keySet());
}
}

View File

@@ -0,0 +1,51 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import lombok.Data;
/**
* 语音克隆请求统一DTO
*
* <p>屏蔽不同供应商API差异提供统一的请求结构。
* 各Provider实现负责将此DTO转换为供应商特定格式。
*
* @author 芋道源码
*/
@Data
public class VoiceCloneRequest {
/**
* 音频文件公网URL
*
* <p>CosyVoice: 对应 {@code url} 字段</p>
* <p>SiliconFlow: 对应 {@code audio} 字段需base64编码</p>
*/
private String audioUrl;
/**
* 模型名称
*
* <p>CosyVoice: 对应 {@code targetModel},如 {@code cosyvoice-v3-flash}</p>
* <p>SiliconFlow: 对应 {@code model},如 {@code indextts-2}</p>
*/
private String model;
/**
* 音色自定义前缀(可选)
*
* <p>CosyVoice: 必填,仅允许数字和小写字母,长度<10字符</p>
* <p>SiliconFlow: 不适用</p>
*/
private String prefix;
/**
* 采样率默认24000
*/
private Integer sampleRate;
/**
* 音频格式默认mp3
*
* <p>可选值: mp3, wav, flac</p>
*/
private String audioFormat;
}

View File

@@ -0,0 +1,24 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import lombok.Data;
/**
* 语音克隆结果统一DTO
*
* @author 芋道源码
*/
@Data
public class VoiceCloneResult {
/**
* 生成的音色ID
*
* <p>后续TTS合成时使用此ID</p>
*/
private String voiceId;
/**
* 请求ID用于追踪
*/
private String requestId;
}

View File

@@ -0,0 +1,77 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import lombok.Builder;
import lombok.Data;
/**
* 文本转语音请求统一DTO
*
* <p>屏蔽不同供应商API差异提供统一的请求结构。
*
* @author 芋道源码
*/
@Data
@Builder
public class VoiceTtsRequest {
/**
* 待合成文本
*/
private String text;
/**
* 音色ID可选默认使用配置
*
* <p>使用语音克隆生成的voiceId</p>
*/
private String voiceId;
/**
* 语音文件URL当使用语音URL合成时使用替代voiceId
*
* <p>用于实时语音克隆,无需提前克隆</p>
*/
private String fileUrl;
/**
* 参考音频文本当使用fileUrl时用于提高克隆质量
*/
private String referenceText;
/**
* 模型(默认使用供应商默认模型)
*/
private String model;
/**
* 语速0.5 - 2.0默认1.0
*/
private Float speechRate;
/**
* 音量(-100 - 100默认0
*/
private Float volume;
/**
* 指令(用于控制音色风格),可选
*/
private String instruction;
/**
* 采样率默认24000
*/
private Integer sampleRate;
/**
* 音频格式默认mp3
*
* <p>可选值: mp3, wav, flac</p>
*/
private String audioFormat;
/**
* 是否仅用于试听(方便服务侧做限流)
*/
private boolean preview;
}

View File

@@ -0,0 +1,39 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import lombok.Data;
/**
* 文本转语音结果统一DTO
*
* @author 芋道源码
*/
@Data
public class VoiceTtsResult {
/**
* 请求ID用于追踪
*/
private String requestId;
/**
* 返回的音频格式
*
* <p>mp3, wav, flac 等</p>
*/
private String format;
/**
* 采样率
*/
private Integer sampleRate;
/**
* 音频二进制内容
*/
private byte[] audio;
/**
* 音频所使用的 voiceId
*/
private String voiceId;
}

View File

@@ -0,0 +1,64 @@
package cn.iocoder.yudao.module.tik.voice.config;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.time.Duration;
/**
* CosyVoice 供应商配置
*
* <p>继承通用配置,添加 CosyVoice 特有字段。
*
* @author 芋道源码
*/
@Data
@EqualsAndHashCode(callSuper = true)
public class CosyVoiceProviderConfig extends VoiceProviderProperties.ProviderConfig {
/**
* 默认模型
*/
private String defaultModel = "cosyvoice-v3-flash";
/**
* 默认 voiceId可选
*/
private String defaultVoiceId;
/**
* 默认采样率
*/
private Integer sampleRate = 24000;
/**
* 默认音频格式
*/
private String audioFormat = "mp3";
/**
* 试听默认示例文本
*/
private String previewText = "您好,欢迎体验专属音色。";
/**
* TTS 接口地址
*/
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
/**
* 语音复刻接口地址(声音注册)
*/
private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
/**
* 连接超时时间
*/
private Duration connectTimeout = Duration.ofSeconds(10);
/**
* 读取超时时间3分钟提升语音合成成功率
*/
private Duration readTimeout = Duration.ofSeconds(180);
}

View File

@@ -0,0 +1,78 @@
package cn.iocoder.yudao.module.tik.voice.config;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Map;
/**
* 语音 Provider 统一配置
*
* <p>支持多供应商配置,默认供应商选择。
*
* @author 芋道源码
*/
@Data
@Component
@ConfigurationProperties(prefix = "yudao.voice")
public class VoiceProviderProperties {
/**
* 默认供应商类型
*
* <p>可选值: cosyvoice, siliconflow 等
*/
private String defaultProvider = "cosyvoice";
/**
* 各供应商配置
*
* <p>key 为供应商类型(如 cosyvoice, siliconflow
*/
private Map<String, ProviderConfig> providers = new HashMap<>();
/**
* 供应商通用配置基类
*/
@Data
public static class ProviderConfig {
/**
* 是否启用
*/
private boolean enabled = true;
/**
* API Key
*/
private String apiKey;
/**
* 优先级(数字越小优先级越高,用于故障转移)
*/
private Integer priority = 100;
}
/**
* 获取指定供应商配置
*
* @param providerType 供应商类型
* @return 配置对象,不存在返回 null
*/
public ProviderConfig getProviderConfig(String providerType) {
return providers.get(providerType);
}
/**
* 检查供应商是否启用
*
* @param providerType 供应商类型
* @return true 如果启用且配置存在
*/
public boolean isProviderEnabled(String providerType) {
ProviderConfig config = getProviderConfig(providerType);
return config != null && config.isEnabled();
}
}

View File

@@ -19,12 +19,14 @@ import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProvider;
import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProviderFactory;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
@@ -84,11 +86,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
private TikHupService tikHupService;
@Resource
private CosyVoiceClient cosyVoiceClient;
private VoiceCloneProviderFactory voiceProviderFactory;
@Resource
private CosyVoiceProperties cosyVoiceProperties;
@Resource
private VoiceProviderProperties voiceProviderProperties;
@Resource
private StringRedisTemplate stringRedisTemplate;
@@ -139,17 +144,20 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.setTranscription(null); // 初始为空,表示未识别
voiceMapper.insert(voice);
// 4. 调用阿里云语音复刻服务,生成 voice_id
// 4. 调用语音克隆服务,生成 voice_id
try {
log.info("[createVoice][开始语音复刻,配音编号({})文件ID({})]", voice.getId(), fileDO.getId());
log.info("[createVoice][开始语音复刻,配音编号({})文件ID({}),供应商({})]",
voice.getId(), fileDO.getId(), createReqVO.getProviderType());
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
cloneRequest.setTargetModel("cosyvoice-v3-flash"); // 使用v3-flash模型
// 使用 Provider 接口(支持前端选择供应商,不传则使用默认)
VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
cloneRequest.setAudioUrl(fileAccessUrl);
cloneRequest.setModel("cosyvoice-v3-flash"); // 使用v3-flash模型
cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求
cloneRequest.setUrl(fileAccessUrl);
CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest);
VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
String voiceId = cloneResult.getVoiceId();
// 更新配音记录,保存 voice_id
@@ -432,22 +440,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
return buildSynthResponseFromCache(reqVO, synthCache);
}
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
finalText,
voiceId,
fileUrl,
transcriptionText,
reqVO.getModel(),
reqVO.getSpeechRate(),
reqVO.getVolume(),
reqVO.getInstruction(),
reqVO.getSampleRate(),
reqVO.getAudioFormat(),
false
));
// 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认)
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
.text(finalText)
.voiceId(voiceId)
.fileUrl(fileUrl)
.referenceText(transcriptionText)
.model(reqVO.getModel())
.speechRate(reqVO.getSpeechRate())
.volume(reqVO.getVolume())
.instruction(reqVO.getInstruction())
.sampleRate(reqVO.getSampleRate())
.audioFormat(reqVO.getAudioFormat())
.preview(false)
.build();
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
String finalVoiceId = StrUtil.blankToDefault(voiceId, getDefaultVoiceId());
// 【安全方案】不暴露OSS链接直接返回Base64编码的音频数据
String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
@@ -527,7 +539,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
voiceId = voice.getVoiceId();
// 注意:使用 voiceId 时,不依赖 transcriptionText直接使用前端传入的 inputText
transcriptionText = null; // 清除 transcriptionText
inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
} else {
log.info("[previewVoice][使用文件URL试听配音编号({})]", voiceConfigId);
// 获取文件信息用于获取文件URL
@@ -543,17 +555,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
}
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
StrUtil.blankToDefault(transcriptionText, getPreviewText()));
}
}
// 3. 如果没有配置ID使用系统配音配置需要前端传voiceId
else {
log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId);
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
if (StrUtil.isBlank(voiceId)) {
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
}
inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
}
String finalText = determineSynthesisText(
@@ -588,21 +600,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
}
log.info("[previewVoice][调用CosyVoice合成,配音编号({})voiceId({})fileUrl({}),文本长度({})]",
voiceConfigId, voiceId, fileUrl, finalText.length());
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
finalText,
voiceId,
fileUrl,
transcriptionText, // 参考音频文本,用于提高克隆质量
null, // 使用默认模型
speechRate,
volume,
instruction,
null,
audioFormat,
true
));
log.info("[previewVoice][调用语音合成服务,配音编号({})voiceId({})fileUrl({}),文本长度({}),供应商({})]",
voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType());
// 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认)
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
.text(finalText)
.voiceId(voiceId)
.fileUrl(fileUrl)
.referenceText(transcriptionText)
.model(null) // 使用默认模型
.speechRate(speechRate)
.volume(volume)
.instruction(instruction)
.sampleRate(null)
.audioFormat(audioFormat)
.preview(true)
.build();
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
String format = defaultFormat(ttsResult.getFormat(), audioFormat);
String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
@@ -622,35 +639,53 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
return buildPreviewResp(audioBase64, format, voiceId);
}
private CosyVoiceTtsRequest buildTtsRequest(String text,
String voiceId,
String fileUrl,
String referenceText,
String model,
Float speechRate,
Float volume,
String instruction,
Integer sampleRate,
String audioFormat,
boolean preview) {
return CosyVoiceTtsRequest.builder()
.text(text)
.voiceId(voiceId)
.fileUrl(fileUrl)
.referenceText(referenceText)
.model(model)
.speechRate(speechRate)
.volume(volume)
.instruction(instruction)
.sampleRate(sampleRate)
.audioFormat(audioFormat)
.preview(preview)
.build();
/**
* 获取 CosyVoice 配置(统一入口)
* 优先使用新配置,回退到旧配置
*/
private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() {
if (voiceProviderProperties != null) {
var config = voiceProviderProperties.getProviderConfig("cosyvoice");
if (config instanceof cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig cosyConfig) {
return cosyConfig;
}
}
return null;
}
/**
* 获取默认音频格式
* 优先使用新配置,回退到旧配置
*/
private String getDefaultFormat() {
var config = getCosyVoiceConfig();
if (config != null) {
return config.getAudioFormat();
}
if (cosyVoiceProperties != null) {
return cosyVoiceProperties.getAudioFormat();
}
return "mp3";
}
/**
* 获取默认采样率
* 优先使用新配置,回退到旧配置
*/
private Integer getDefaultSampleRate() {
var config = getCosyVoiceConfig();
if (config != null) {
return config.getSampleRate();
}
if (cosyVoiceProperties != null) {
return cosyVoiceProperties.getSampleRate();
}
return 24000;
}
private String defaultFormat(String responseFormat, String requestFormat) {
return StrUtil.blankToDefault(responseFormat,
StrUtil.blankToDefault(requestFormat, cosyVoiceProperties.getAudioFormat()));
StrUtil.blankToDefault(requestFormat, getDefaultFormat()));
}
private String buildFileName(String voiceId, String format) {
@@ -687,7 +722,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
return builder.toString();
}
if (allowFallback) {
return cosyVoiceProperties.getPreviewText();
return getPreviewText();
}
throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
}
@@ -750,15 +785,19 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
} else {
identifier = "no-voice";
}
// 获取默认配置
String defaultFormat = getDefaultFormat();
Integer defaultSampleRate = getDefaultSampleRate();
String payload = StrUtil.join("|",
identifier,
text,
speechRate != null ? speechRate : "1.0",
volume != null ? volume : "0",
instruction,
StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
StrUtil.blankToDefault(audioFormat, defaultFormat),
sampleRate != null ? sampleRate : defaultSampleRate);
String hash = cn.hutool.crypto.SecureUtil.sha256(payload);
return prefix + hash;
}
@@ -1123,5 +1162,35 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
builder.append(normalized);
}
/**
* 获取默认音色ID
* 优先使用新配置,回退到旧配置
*/
private String getDefaultVoiceId() {
var config = getCosyVoiceConfig();
if (config != null) {
return config.getDefaultVoiceId();
}
if (cosyVoiceProperties != null) {
return cosyVoiceProperties.getDefaultVoiceId();
}
return null;
}
/**
* 获取试听文本
* 优先使用新配置,回退到旧配置
*/
private String getPreviewText() {
var config = getCosyVoiceConfig();
if (config != null) {
return config.getPreviewText();
}
if (cosyVoiceProperties != null) {
return cosyVoiceProperties.getPreviewText();
}
return "您好,欢迎体验专属音色。";
}
}

View File

@@ -34,5 +34,7 @@ public class AppTikUserVoiceCreateReqVO {
@Schema(description = "备注", example = "这是一个测试配音")
private String note;
}
@Schema(description = "供应商类型cosyvoice-阿里云siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
private String providerType;
}

View File

@@ -42,6 +42,8 @@ public class AppTikVoicePreviewReqVO {
@Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读")
private String instruction;
@Schema(description = "供应商类型cosyvoice-阿里云siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
private String providerType;
}

View File

@@ -44,6 +44,8 @@ public class AppTikVoiceTtsReqVO {
@Schema(description = "音频格式,默认 wav可选 mp3")
private String audioFormat;
@Schema(description = "供应商类型cosyvoice-阿里云siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
private String providerType;
}