语音优化
This commit is contained in:
@@ -1,355 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.framework.common.exception.ServiceException;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
|
||||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||||
import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice;
|
||||
import com.alibaba.dashscope.audio.ttsv2.enrollment.VoiceEnrollmentService;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.Base64;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
|
||||
import static cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants.VOICE_TTS_FAILED;
|
||||
|
||||
/**
|
||||
* CosyVoice 客户端
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class CosyVoiceClient {
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private final CosyVoiceProviderConfig config;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
private volatile OkHttpClient httpClient;
|
||||
|
||||
/**
|
||||
* 调用 CosyVoice TTS 接口
|
||||
*/
|
||||
public CosyVoiceTtsResult synthesize(CosyVoiceTtsRequest request) {
|
||||
if (!config.isEnabled()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
|
||||
}
|
||||
|
||||
// 添加详细的参数检查日志
|
||||
String text = request != null ? request.getText() : null;
|
||||
log.error("[CosyVoice][TTS参数检查][request={}, text={}, voiceId={}, model={}]",
|
||||
request != null ? "存在" : "为null",
|
||||
text != null ? "'" + text + "' (长度:" + text.length() + ")" : "为null",
|
||||
request != null ? request.getVoiceId() : null,
|
||||
request != null ? request.getModel() : null);
|
||||
|
||||
if (request == null || StrUtil.isBlank(request.getText())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
|
||||
}
|
||||
if (StrUtil.isBlank(request.getVoiceId())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "必须提供 voiceId");
|
||||
}
|
||||
|
||||
SpeechSynthesizer synthesizer = null;
|
||||
try {
|
||||
log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, instruction={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText().length(),
|
||||
StrUtil.blankToDefault(request.getModel(), config.getDefaultModel()),
|
||||
request.getSpeechRate(),
|
||||
request.getInstruction());
|
||||
|
||||
// 使用 DashScope SDK 构建参数(严格按文档)
|
||||
// 注意:speechRate 和 volume 需要转换为 int 类型
|
||||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||||
.apiKey(config.getApiKey())
|
||||
.model(StrUtil.blankToDefault(request.getModel(), config.getDefaultModel()))
|
||||
.voice(request.getVoiceId())
|
||||
.speechRate(request.getSpeechRate() != null ? request.getSpeechRate().intValue() : 1)
|
||||
.volume(request.getVolume() != null ? request.getVolume().intValue() : 0)
|
||||
.build();
|
||||
|
||||
if (StrUtil.isNotBlank(request.getInstruction())) {
|
||||
param.setInstruction(request.getInstruction());
|
||||
}
|
||||
|
||||
log.error("[CosyVoice][SDK参数][param={}, text='{}']", param, request.getText());
|
||||
|
||||
// 初始化合成器(同步调用传 null)
|
||||
synthesizer = new SpeechSynthesizer(param, null);
|
||||
|
||||
// 阻塞调用,获取完整音频
|
||||
ByteBuffer audioData = synthesizer.call(request.getText());
|
||||
|
||||
if (audioData == null) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频数据");
|
||||
}
|
||||
|
||||
// 转换为字节数组(严格按照文档:直接使用 array())
|
||||
byte[] audioBytes = audioData.array();
|
||||
|
||||
log.info("[CosyVoice][TTS合成成功][Request ID: {}, audioSize={}, 首包延迟={}ms]",
|
||||
synthesizer.getLastRequestId(),
|
||||
audioBytes.length,
|
||||
synthesizer.getFirstPackageDelay());
|
||||
|
||||
// 构建返回结果
|
||||
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : config.getAudioFormat());
|
||||
result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate());
|
||||
result.setRequestId(synthesizer.getLastRequestId());
|
||||
result.setVoiceId(request.getVoiceId());
|
||||
|
||||
return result;
|
||||
|
||||
} catch (ServiceException ex) {
|
||||
throw ex;
|
||||
} catch (Exception ex) {
|
||||
log.error("[CosyVoice][TTS异常][voiceId={}, text={}]", request.getVoiceId(), request.getText(), ex);
|
||||
throw exception(VOICE_TTS_FAILED);
|
||||
} finally {
|
||||
// 关闭 WebSocket 连接
|
||||
if (synthesizer != null) {
|
||||
try {
|
||||
synthesizer.getDuplexApi().close(1000, "任务结束");
|
||||
} catch (Exception e) {
|
||||
log.warn("[CosyVoice][关闭连接失败]", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 HTTP API 进行 TTS 合成(备用方案)
|
||||
*/
|
||||
private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception {
|
||||
String payload = objectMapper.writeValueAsString(buildPayload(request));
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(config.getTtsUrl())
|
||||
.addHeader("Authorization", "Bearer " + config.getApiKey())
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
|
||||
.build();
|
||||
|
||||
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
|
||||
String body = response.body() != null ? response.body().string() : "";
|
||||
if (!response.isSuccessful()) {
|
||||
log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
|
||||
throw buildException(body);
|
||||
}
|
||||
return parseTtsResult(body, request);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用 CosyVoice 语音复刻接口(声音注册)
|
||||
*/
|
||||
public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) {
|
||||
if (!config.isEnabled()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getUrl())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "复刻音频URL不能为空");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getTargetModel())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "复刻模型不能为空");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getPrefix())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "音色前缀不能为空");
|
||||
}
|
||||
|
||||
try {
|
||||
log.info("[CosyVoice][开始语音复刻][targetModel={}, prefix={}, url={}]",
|
||||
request.getTargetModel(), request.getPrefix(), request.getUrl());
|
||||
|
||||
// 使用 DashScope SDK 创建语音复刻
|
||||
VoiceEnrollmentService service = new VoiceEnrollmentService(config.getApiKey());
|
||||
Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl());
|
||||
|
||||
log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]",
|
||||
service.getLastRequestId(), voice.getVoiceId());
|
||||
|
||||
// 构建返回结果
|
||||
CosyVoiceCloneResult result = new CosyVoiceCloneResult();
|
||||
result.setVoiceId(voice.getVoiceId());
|
||||
result.setRequestId(service.getLastRequestId());
|
||||
|
||||
return result;
|
||||
} catch (ServiceException ex) {
|
||||
throw ex;
|
||||
} catch (Exception ex) {
|
||||
log.error("[CosyVoice][语音复刻异常][targetModel={}, prefix={}]",
|
||||
request.getTargetModel(), request.getPrefix(), ex);
|
||||
throw exception(VOICE_TTS_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> buildPayload(CosyVoiceTtsRequest request) {
|
||||
Map<String, Object> payload = new HashMap<>();
|
||||
String model = StrUtil.blankToDefault(request.getModel(), config.getDefaultModel());
|
||||
payload.put("model", model);
|
||||
|
||||
Map<String, Object> input = new HashMap<>();
|
||||
input.put("text", request.getText());
|
||||
|
||||
// 优先使用fileUrl(语音克隆),否则使用voiceId(系统音色)
|
||||
if (StrUtil.isNotBlank(request.getFileUrl())) {
|
||||
// 直接使用预签名URL(带签名和时效),阿里云API需要这个签名URL
|
||||
input.put("audio_url", request.getFileUrl());
|
||||
log.info("[CosyVoice][使用语音克隆][audio_url={}]", request.getFileUrl());
|
||||
|
||||
// 如果提供了参考文本,也一并传递(用于提高语音克隆质量)
|
||||
if (StrUtil.isNotBlank(request.getReferenceText())) {
|
||||
input.put("reference_text", request.getReferenceText());
|
||||
log.info("[CosyVoice][添加参考文本][length={}]", request.getReferenceText().length());
|
||||
}
|
||||
} else {
|
||||
// 使用系统音色
|
||||
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), config.getDefaultVoiceId());
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
input.put("voice", voiceId);
|
||||
log.info("[CosyVoice][使用系统音色][voice={}]", voiceId);
|
||||
} else {
|
||||
log.warn("[CosyVoice][未提供voiceId或fileUrl]");
|
||||
}
|
||||
}
|
||||
payload.put("input", input);
|
||||
|
||||
Map<String, Object> parameters = new HashMap<>();
|
||||
int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate();
|
||||
parameters.put("sample_rate", sampleRate);
|
||||
|
||||
// 根据官方文档,统一使用小写格式
|
||||
String format = StrUtil.blankToDefault(request.getAudioFormat(), config.getAudioFormat()).toLowerCase();
|
||||
parameters.put("format", format);
|
||||
|
||||
if (request.getSpeechRate() != null) {
|
||||
parameters.put("speech_rate", request.getSpeechRate());
|
||||
}
|
||||
if (request.getVolume() != null) {
|
||||
// 文档显示volume范围是0-100
|
||||
parameters.put("volume", Math.round(request.getVolume()));
|
||||
}
|
||||
if (request.isPreview()) {
|
||||
parameters.put("preview", true);
|
||||
}
|
||||
|
||||
payload.put("parameters", parameters);
|
||||
|
||||
// 打印完整请求体(用于调试)
|
||||
log.info("[CosyVoice][请求参数][model={}, sample_rate={}, format={}, text_length={}]",
|
||||
model, sampleRate, format, request.getText().length());
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
private CosyVoiceTtsResult parseTtsResult(String body, CosyVoiceTtsRequest request) throws Exception {
|
||||
JsonNode root = objectMapper.readTree(body);
|
||||
|
||||
// 错误响应包含 code 字段
|
||||
if (root.has("code")) {
|
||||
String message = root.has("message") ? root.get("message").asText() : body;
|
||||
log.error("[CosyVoice][TTS失败][code={}, message={}]", root.get("code").asText(), message);
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), message);
|
||||
}
|
||||
|
||||
JsonNode audioNode = root.path("output").path("audio");
|
||||
if (!audioNode.isArray() || audioNode.isEmpty()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回的音频为空");
|
||||
}
|
||||
|
||||
JsonNode firstAudio = audioNode.get(0);
|
||||
String content = firstAudio.path("content").asText();
|
||||
if (StrUtil.isBlank(content)) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频内容");
|
||||
}
|
||||
|
||||
byte[] audioBytes = Base64.getDecoder().decode(content);
|
||||
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(firstAudio.path("format").asText(StrUtil.blankToDefault(request.getAudioFormat(), config.getAudioFormat())));
|
||||
result.setSampleRate(firstAudio.path("sample_rate").asInt(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate()));
|
||||
result.setRequestId(root.path("request_id").asText());
|
||||
result.setVoiceId(firstAudio.path("voice").asText(request.getVoiceId()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private OkHttpClient getHttpClient() {
|
||||
if (httpClient == null) {
|
||||
synchronized (this) {
|
||||
if (httpClient == null) {
|
||||
java.time.Duration connect = defaultDuration(config.getConnectTimeout(), 10);
|
||||
java.time.Duration read = defaultDuration(config.getReadTimeout(), 60);
|
||||
httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.readTimeout(read.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
}
|
||||
return httpClient;
|
||||
}
|
||||
|
||||
private Duration defaultDuration(Duration duration, long seconds) {
|
||||
return duration == null ? Duration.ofSeconds(seconds) : duration;
|
||||
}
|
||||
|
||||
private ServiceException buildException(String body) {
|
||||
try {
|
||||
JsonNode root = objectMapper.readTree(body);
|
||||
String message = CollUtil.getFirst(
|
||||
CollUtil.newArrayList(
|
||||
root.path("message").asText(null),
|
||||
root.path("output").path("message").asText(null)));
|
||||
return exception0(VOICE_TTS_FAILED.getCode(), StrUtil.blankToDefault(message, "CosyVoice 调用失败"));
|
||||
} catch (Exception ignored) {
|
||||
return exception0(VOICE_TTS_FAILED.getCode(), body);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从URL中提取原始URL(去除查询参数和锚点)
|
||||
*
|
||||
* @param url 可能包含查询参数的URL
|
||||
* @return 原始URL(去除查询参数和锚点)
|
||||
*/
|
||||
private String extractRawUrl(String url) {
|
||||
if (StrUtil.isBlank(url)) {
|
||||
return url;
|
||||
}
|
||||
try {
|
||||
java.net.URL urlObj = new java.net.URL(url);
|
||||
// 只使用协议、主机、路径部分,忽略查询参数和锚点
|
||||
return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath();
|
||||
} catch (Exception e) {
|
||||
// 如果URL解析失败,使用简单方式去除查询参数
|
||||
return url.split("\\?")[0].split("#")[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* CosyVoice Provider 实现
|
||||
*
|
||||
* <p>阿里云 CosyVoice 语音服务的 Provider 实现。
|
||||
* 内部委托给 {@link CosyVoiceClient} 进行实际的API调用。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class CosyVoiceProvider implements VoiceCloneProvider {
|
||||
|
||||
private final CosyVoiceClient cosyVoiceClient;
|
||||
private final VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
/**
|
||||
* 获取 CosyVoice 配置
|
||||
*/
|
||||
private CosyVoiceProviderConfig getConfig() {
|
||||
var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice");
|
||||
if (baseConfig instanceof CosyVoiceProviderConfig config) {
|
||||
return config;
|
||||
}
|
||||
return new CosyVoiceProviderConfig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
|
||||
log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]",
|
||||
request.getAudioUrl(), request.getModel());
|
||||
|
||||
// 适配到 CosyVoiceCloneRequest
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest cosyRequest =
|
||||
new cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest();
|
||||
|
||||
cosyRequest.setUrl(request.getAudioUrl());
|
||||
cosyRequest.setTargetModel(request.getModel());
|
||||
cosyRequest.setPrefix(request.getPrefix());
|
||||
if (request.getSampleRate() != null) {
|
||||
cosyRequest.setSampleRate(request.getSampleRate());
|
||||
}
|
||||
if (request.getAudioFormat() != null) {
|
||||
cosyRequest.setAudioFormat(request.getAudioFormat());
|
||||
}
|
||||
|
||||
// 调用底层 Client
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult cosyResult =
|
||||
cosyVoiceClient.cloneVoice(cosyRequest);
|
||||
|
||||
// 适配到统一 Result
|
||||
VoiceCloneResult result = new VoiceCloneResult();
|
||||
result.setVoiceId(cosyResult.getVoiceId());
|
||||
result.setRequestId(cosyResult.getRequestId());
|
||||
|
||||
log.info("[CosyVoiceProvider][语音克隆成功][voiceId={}]", result.getVoiceId());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceTtsResult synthesize(VoiceTtsRequest request) {
|
||||
log.info("[CosyVoiceProvider][语音合成][voiceId={}, textLength={}, model={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText() != null ? request.getText().length() : 0,
|
||||
request.getModel());
|
||||
|
||||
// 适配到 CosyVoiceTtsRequest
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest cosyRequest =
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest.builder()
|
||||
.text(request.getText())
|
||||
.voiceId(request.getVoiceId())
|
||||
.fileUrl(request.getFileUrl())
|
||||
.referenceText(request.getReferenceText())
|
||||
.model(request.getModel())
|
||||
.speechRate(request.getSpeechRate())
|
||||
.volume(request.getVolume())
|
||||
.instruction(request.getInstruction())
|
||||
.sampleRate(request.getSampleRate())
|
||||
.audioFormat(request.getAudioFormat())
|
||||
.preview(request.isPreview())
|
||||
.build();
|
||||
|
||||
log.error("[CosyVoiceProvider][构建的cosyRequest][text='{}', voiceId={}, fileUrl={}]",
|
||||
cosyRequest.getText(), cosyRequest.getVoiceId(), cosyRequest.getFileUrl());
|
||||
|
||||
// 调用底层 Client
|
||||
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult =
|
||||
cosyVoiceClient.synthesize(cosyRequest);
|
||||
|
||||
// 适配到统一 Result
|
||||
VoiceTtsResult result = new VoiceTtsResult();
|
||||
result.setRequestId(cosyResult.getRequestId());
|
||||
result.setFormat(cosyResult.getFormat());
|
||||
result.setSampleRate(cosyResult.getSampleRate());
|
||||
result.setAudio(cosyResult.getAudio());
|
||||
result.setVoiceId(cosyResult.getVoiceId());
|
||||
|
||||
log.info("[CosyVoiceProvider][语音合成成功][format={}, audioSize={}]",
|
||||
result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supports(String providerType) {
|
||||
return "cosyvoice".equalsIgnoreCase(providerType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProviderType() {
|
||||
return "cosyvoice";
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpRequest;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowReference;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadResponse;
|
||||
@@ -21,6 +22,7 @@ import java.io.ByteArrayOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.Base64;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* 硅基流动 Provider 实现
|
||||
@@ -108,19 +110,42 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
throw new RuntimeException("硅基流动供应商未配置或已禁用");
|
||||
}
|
||||
|
||||
log.info("[SiliconFlowProvider][语音合成][voiceId={}, textLength={}, model={}]",
|
||||
// 判断使用哪种模式
|
||||
boolean useReferenceMode = StrUtil.isBlank(request.getVoiceId())
|
||||
&& StrUtil.isNotBlank(request.getFileUrl());
|
||||
|
||||
log.info("[SiliconFlowProvider][语音合成][voiceId={}, fileUrl={}, textLength={}, model={}, mode={}]",
|
||||
request.getVoiceId(),
|
||||
request.getFileUrl() != null ? "存在" : "无",
|
||||
request.getText() != null ? request.getText().length() : 0,
|
||||
request.getModel());
|
||||
request.getModel(),
|
||||
useReferenceMode ? "动态音色" : "标准音色");
|
||||
|
||||
try {
|
||||
SiliconFlowTtsRequest sfRequest = SiliconFlowTtsRequest.builder()
|
||||
SiliconFlowTtsRequest.SiliconFlowTtsRequestBuilder requestBuilder = SiliconFlowTtsRequest.builder()
|
||||
.model(getOrDefault(request.getModel(), getOrDefault(config.getDefaultModel(), "IndexTeam/IndexTTS-2")))
|
||||
.input(request.getText())
|
||||
.voice(request.getVoiceId())
|
||||
.speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
|
||||
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
|
||||
.build();
|
||||
.gain(request.getVolume());
|
||||
|
||||
if (useReferenceMode) {
|
||||
// 用户动态音色模式:voice 传空,使用 references
|
||||
log.info("[SiliconFlowProvider][使用动态音色模式][fileUrl={}]", request.getFileUrl());
|
||||
requestBuilder.voice("");
|
||||
|
||||
SiliconFlowReference reference = SiliconFlowReference.builder()
|
||||
.audio(request.getFileUrl())
|
||||
.text(request.getReferenceText())
|
||||
.build();
|
||||
requestBuilder.references(Collections.singletonList(reference));
|
||||
} else {
|
||||
// 标准模式:使用 voiceId
|
||||
log.info("[SiliconFlowProvider][使用标准音色模式][voiceId={}]", request.getVoiceId());
|
||||
requestBuilder.voice(request.getVoiceId());
|
||||
}
|
||||
|
||||
SiliconFlowTtsRequest sfRequest = requestBuilder.build();
|
||||
|
||||
String url = config.getBaseUrl() + config.getTtsUrl();
|
||||
String requestBody = JSONUtil.toJsonStr(sfRequest);
|
||||
@@ -141,15 +166,16 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
}
|
||||
|
||||
byte[] audioBytes = response.bodyBytes();
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioBytes);
|
||||
|
||||
VoiceTtsResult result = new VoiceTtsResult();
|
||||
result.setAudio(Base64.getDecoder().decode(base64Audio));
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(sfRequest.getResponseFormat());
|
||||
result.setVoiceId(request.getVoiceId());
|
||||
|
||||
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",
|
||||
result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0);
|
||||
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}, mode={}]",
|
||||
result.getFormat(),
|
||||
result.getAudio() != null ? result.getAudio().length : 0,
|
||||
useReferenceMode ? "动态音色" : "标准音色");
|
||||
return result;
|
||||
|
||||
} catch (Exception e) {
|
||||
|
||||
@@ -41,7 +41,7 @@ public interface VoiceCloneProvider {
|
||||
/**
|
||||
* 检查是否支持指定的供应商类型
|
||||
*
|
||||
* @param providerType 供应商类型(如 "cosyvoice", "siliconflow")
|
||||
* @param providerType 供应商类型(如 "siliconflow")
|
||||
* @return true 如果支持,false 否则
|
||||
*/
|
||||
boolean supports(String providerType);
|
||||
@@ -49,7 +49,7 @@ public interface VoiceCloneProvider {
|
||||
/**
|
||||
* 获取供应商类型标识
|
||||
*
|
||||
* @return 供应商类型,如 "cosyvoice", "siliconflow"
|
||||
* @return 供应商类型,如 "siliconflow"
|
||||
*/
|
||||
String getProviderType();
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ public class VoiceCloneProviderFactory {
|
||||
/**
|
||||
* 根据类型获取 Provider
|
||||
*
|
||||
* @param providerType 供应商类型(如 "cosyvoice", "siliconflow")
|
||||
* @param providerType 供应商类型(如 "siliconflow")
|
||||
* @return 对应的 Provider 实例
|
||||
* @throws ServiceException 当 Provider 不存在时抛出
|
||||
*/
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice 语音复刻请求
|
||||
*/
|
||||
@Data
|
||||
public class CosyVoiceCloneRequest {
|
||||
|
||||
/**
|
||||
* 复刻模型(cosyvoice-v3-flash 等)
|
||||
*/
|
||||
private String targetModel;
|
||||
|
||||
/**
|
||||
* 音色自定义前缀(仅允许数字和小写字母,长度<10字符)
|
||||
*/
|
||||
private String prefix;
|
||||
|
||||
/**
|
||||
* 音频文件公网URL
|
||||
*/
|
||||
private String url;
|
||||
|
||||
/**
|
||||
* 采样率,默认24000
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频格式,默认wav
|
||||
*/
|
||||
private String audioFormat;
|
||||
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice 语音复刻结果
|
||||
*/
|
||||
@Data
|
||||
public class CosyVoiceCloneResult {
|
||||
|
||||
/**
|
||||
* 生成的 voice_id
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 请求ID
|
||||
*/
|
||||
private String requestId;
|
||||
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice TTS 请求
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class CosyVoiceTtsRequest {
|
||||
|
||||
/**
|
||||
* 待合成文本
|
||||
*/
|
||||
private String text;
|
||||
|
||||
/**
|
||||
* 声音 ID(可选,默认使用配置)
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 语音文件URL(当使用语音URL合成时使用,替代voiceId)
|
||||
*/
|
||||
private String fileUrl;
|
||||
|
||||
/**
|
||||
* 参考音频文本(当使用fileUrl时,用于提高克隆质量)
|
||||
*/
|
||||
private String referenceText;
|
||||
|
||||
/**
|
||||
* 模型(默认 cosyvoice-v3-flash)
|
||||
*/
|
||||
private String model;
|
||||
|
||||
/**
|
||||
* 语速
|
||||
*/
|
||||
private Float speechRate;
|
||||
|
||||
/**
|
||||
* 音量,可选
|
||||
*/
|
||||
private Float volume;
|
||||
|
||||
/**
|
||||
* 指令(用于控制音色风格),可选
|
||||
*/
|
||||
private String instruction;
|
||||
|
||||
/**
|
||||
* 采样率
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频格式
|
||||
*/
|
||||
private String audioFormat;
|
||||
|
||||
/**
|
||||
* 是否仅用于试听,方便服务侧做限流
|
||||
*/
|
||||
private boolean preview;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice TTS 响应
|
||||
*/
|
||||
@Data
|
||||
public class CosyVoiceTtsResult {
|
||||
|
||||
/**
|
||||
* 请求ID
|
||||
*/
|
||||
private String requestId;
|
||||
|
||||
/**
|
||||
* 返回的音频格式
|
||||
*/
|
||||
private String format;
|
||||
|
||||
/**
|
||||
* 采样率
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频二进制内容
|
||||
*/
|
||||
private byte[] audio;
|
||||
|
||||
/**
|
||||
* 音频所使用的 voiceId
|
||||
*/
|
||||
private String voiceId;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 硅基流动参考音频配置
|
||||
*
|
||||
* <p>用于用户动态音色模式,通过 references 传递参考音频实现实时语音克隆。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class SiliconFlowReference {
|
||||
|
||||
/**
|
||||
* 参考音频 URL(也支持 base64 格式)
|
||||
*/
|
||||
private String audio;
|
||||
|
||||
/**
|
||||
* 参考音频的文字内容
|
||||
*/
|
||||
private String text;
|
||||
|
||||
}
|
||||
@@ -4,6 +4,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 硅基流动文本转语音请求
|
||||
*
|
||||
@@ -42,4 +44,18 @@ public class SiliconFlowTtsRequest {
|
||||
@JsonProperty("response_format")
|
||||
private String responseFormat;
|
||||
|
||||
/**
|
||||
* 音量增益(-10 到 10,默认 0)
|
||||
*
|
||||
* <p>正值增加音量,负值降低音量
|
||||
*/
|
||||
private Float gain;
|
||||
|
||||
/**
|
||||
* 参考音频列表(用于用户动态音色模式)
|
||||
*
|
||||
* <p>当 voice 为空时,使用此字段传递参考音频实现实时语音克隆
|
||||
*/
|
||||
private List<SiliconFlowReference> references;
|
||||
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ public class VoiceCloneRequest {
|
||||
/**
|
||||
* 音频文件公网URL
|
||||
*
|
||||
* <p>CosyVoice: 对应 {@code url} 字段</p>
|
||||
* <p>SiliconFlow: 对应 {@code audio} 字段(需base64编码)</p>
|
||||
*/
|
||||
private String audioUrl;
|
||||
@@ -24,7 +23,6 @@ public class VoiceCloneRequest {
|
||||
/**
|
||||
* 模型名称
|
||||
*
|
||||
* <p>CosyVoice: 对应 {@code targetModel},如 {@code cosyvoice-v3-flash}</p>
|
||||
* <p>SiliconFlow: 对应 {@code model},如 {@code indextts-2}</p>
|
||||
*/
|
||||
private String model;
|
||||
@@ -32,7 +30,6 @@ public class VoiceCloneRequest {
|
||||
/**
|
||||
* 音色自定义前缀(可选)
|
||||
*
|
||||
* <p>CosyVoice: 必填,仅允许数字和小写字母,长度<10字符</p>
|
||||
* <p>SiliconFlow: 不适用</p>
|
||||
*/
|
||||
private String prefix;
|
||||
@@ -53,7 +50,6 @@ public class VoiceCloneRequest {
|
||||
* 转录文本(可选)
|
||||
*
|
||||
* <p>SiliconFlow: 音频对应的文本内容</p>
|
||||
* <p>CosyVoice: 不适用</p>
|
||||
*/
|
||||
private String transcriptionText;
|
||||
}
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* CosyVoice 供应商配置
|
||||
*
|
||||
* <p>继承通用配置,添加 CosyVoice 特有字段。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "yudao.voice.cosyvoice")
|
||||
public class CosyVoiceProviderConfig extends VoiceProviderProperties.ProviderConfig {
|
||||
|
||||
/**
|
||||
* 默认模型
|
||||
*/
|
||||
private String defaultModel = "cosyvoice-v3-flash";
|
||||
|
||||
/**
|
||||
* 默认 voiceId(可选)
|
||||
*/
|
||||
private String defaultVoiceId;
|
||||
|
||||
/**
|
||||
* 默认采样率
|
||||
*/
|
||||
private Integer sampleRate = 24000;
|
||||
|
||||
/**
|
||||
* 默认音频格式
|
||||
*/
|
||||
private String audioFormat = "mp3";
|
||||
|
||||
/**
|
||||
* 试听默认示例文本
|
||||
*/
|
||||
private String previewText = "您好,欢迎体验专属音色。";
|
||||
|
||||
/**
|
||||
* TTS 接口地址
|
||||
*/
|
||||
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
|
||||
|
||||
/**
|
||||
* 语音复刻接口地址(声音注册)
|
||||
*/
|
||||
private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
private Duration connectTimeout = Duration.ofSeconds(10);
|
||||
|
||||
/**
|
||||
* 读取超时时间(3分钟,提升语音合成成功率)
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(180);
|
||||
|
||||
}
|
||||
@@ -1,8 +1,5 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
/**
|
||||
@@ -11,25 +8,6 @@ import org.springframework.context.annotation.Configuration;
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Configuration
|
||||
@Slf4j
|
||||
public class VoiceAutoConfiguration {
|
||||
|
||||
/**
|
||||
* CosyVoice 供应商配置 Bean
|
||||
*/
|
||||
@Bean
|
||||
@ConditionalOnProperty(prefix = "yudao.voice.providers.cosyvoice", name = "enabled", havingValue = "true", matchIfMissing = true)
|
||||
public CosyVoiceProviderConfig cosyVoiceProviderConfig(VoiceProviderProperties properties) {
|
||||
VoiceProviderProperties.ProviderConfig baseConfig = properties.getProviderConfig("cosyvoice");
|
||||
if (baseConfig == null) {
|
||||
baseConfig = new VoiceProviderProperties.ProviderConfig();
|
||||
}
|
||||
|
||||
CosyVoiceProviderConfig config = new CosyVoiceProviderConfig();
|
||||
config.setEnabled(baseConfig.isEnabled());
|
||||
config.setApiKey(baseConfig.getApiKey());
|
||||
config.setPriority(baseConfig.getPriority());
|
||||
return config;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -22,14 +22,14 @@ public class VoiceProviderProperties {
|
||||
/**
|
||||
* 默认供应商类型
|
||||
*
|
||||
* <p>可选值: cosyvoice, siliconflow 等
|
||||
* <p>可选值: siliconflow 等
|
||||
*/
|
||||
private String defaultProvider = "cosyvoice";
|
||||
private String defaultProvider = "siliconflow";
|
||||
|
||||
/**
|
||||
* 各供应商配置
|
||||
*
|
||||
* <p>key 为供应商类型(如 cosyvoice, siliconflow)
|
||||
* <p>key 为供应商类型(如 siliconflow)
|
||||
*/
|
||||
private Map<String, ProviderConfig> providers = new HashMap<>();
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ public class AppTikUserVoiceController {
|
||||
}
|
||||
|
||||
@PostMapping("/tts")
|
||||
@Operation(summary = "CosyVoice 文本转语音")
|
||||
@Operation(summary = "文本转语音")
|
||||
public CommonResult<AppTikVoiceTtsRespVO> synthesizeVoice(@Valid @RequestBody AppTikVoiceTtsReqVO reqVO) {
|
||||
return success(voiceService.synthesizeVoice(reqVO));
|
||||
}
|
||||
|
||||
@@ -55,9 +55,13 @@ public class TikDigitalHumanTaskDO extends TenantBaseDO {
|
||||
|
||||
// ========== TTS参数 ==========
|
||||
/**
|
||||
* 音色ID(CosyVoice voiceId)
|
||||
* 音色ID(系统音色使用)
|
||||
*/
|
||||
private String voiceId;
|
||||
/**
|
||||
* 用户配音ID(tik_user_voice.id,用户配音使用)
|
||||
*/
|
||||
private Long voiceConfigId;
|
||||
/**
|
||||
* 输入文本(用于语音合成)
|
||||
*/
|
||||
|
||||
@@ -54,10 +54,6 @@ public class TikUserVoiceDO extends TenantBaseDO {
|
||||
* 备注信息
|
||||
*/
|
||||
private String note;
|
||||
/**
|
||||
* 复刻音色ID(CosyVoice 语音复刻生成的 voice_id)
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.enums;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
* CosyVoice情感枚举
|
||||
* 根据阿里云DashScope官方文档定义
|
||||
* 参考:https://help.aliyun.com/zh/dashscope/developer-reference/tts-api
|
||||
*/
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
public enum CosyVoiceEmotionEnum {
|
||||
|
||||
NEUTRAL("neutral", "中性"),
|
||||
HAPPY("happy", "高兴"),
|
||||
SAD("sad", "悲伤"),
|
||||
ANGRY("angry", "愤怒"),
|
||||
SURPRISED("surprised", "惊讶"),
|
||||
DISGUSTED("disgusted", "厌恶"),
|
||||
SCARED("scared", "害怕");
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
public static CosyVoiceEmotionEnum getByCode(String code) {
|
||||
if (StrUtil.isBlank(code)) {
|
||||
return NEUTRAL;
|
||||
}
|
||||
for (CosyVoiceEmotionEnum emotion : values()) {
|
||||
if (emotion.getCode().equalsIgnoreCase(code)) {
|
||||
return emotion;
|
||||
}
|
||||
}
|
||||
return NEUTRAL;
|
||||
}
|
||||
}
|
||||
@@ -390,8 +390,9 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
|
||||
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN, "无权访问该音色");
|
||||
}
|
||||
|
||||
if (StrUtil.isBlank(userVoice.getVoiceId())) {
|
||||
throw new IllegalArgumentException("该音色配置无效,缺少voiceId");
|
||||
// 验证识别文本是否存在(用于动态音色模式)
|
||||
if (StrUtil.isBlank(userVoice.getTranscription())) {
|
||||
throw new IllegalArgumentException("该音色配置无效,请先进行语音识别");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -399,14 +400,8 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
|
||||
* 创建任务记录
|
||||
*/
|
||||
private TikDigitalHumanTaskDO createTaskRecord(AppTikDigitalHumanCreateReqVO reqVO, Long userId) {
|
||||
// 如果是用户音色,需要从voiceConfigId获取voiceId
|
||||
// 直接使用前端传递的 voiceId(系统预置音色),用户音色通过 voiceConfigId 在合成时处理
|
||||
String voiceId = reqVO.getVoiceId();
|
||||
if (voiceId == null && reqVO.getVoiceConfigId() != null) {
|
||||
TikUserVoiceDO userVoice = userVoiceMapper.selectById(reqVO.getVoiceConfigId());
|
||||
if (userVoice != null) {
|
||||
voiceId = userVoice.getVoiceId();
|
||||
}
|
||||
}
|
||||
|
||||
// ✅ 预生成音频信息(无需存储时长,前端严格校验)
|
||||
if (reqVO.getPreGeneratedAudio() != null) {
|
||||
@@ -427,6 +422,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
|
||||
.videoFileId(reqVO.getVideoFileId())
|
||||
.videoUrl(reqVO.getVideoUrl())
|
||||
.voiceId(voiceId)
|
||||
.voiceConfigId(reqVO.getVoiceConfigId())
|
||||
.inputText(reqVO.getInputText())
|
||||
.speechRate(reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f)
|
||||
.volume(reqVO.getVolume() != null ? reqVO.getVolume() : 0f)
|
||||
@@ -550,7 +546,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
|
||||
}
|
||||
|
||||
/**
|
||||
* 语音合成(使用CosyVoice v3 Flash)
|
||||
* 语音合成
|
||||
*/
|
||||
private String synthesizeVoice(TikDigitalHumanTaskDO task) throws Exception {
|
||||
// ✅ 优先使用预生成的音频(前端传递)
|
||||
@@ -561,21 +557,25 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
|
||||
}
|
||||
|
||||
// 如果没有预生成音频,则走正常的TTS流程
|
||||
// 参数验证
|
||||
if (StrUtil.isBlank(task.getVoiceId())) {
|
||||
throw new Exception("音色ID不能为空");
|
||||
// 参数验证:voiceId(系统音色)和 voiceConfigId(用户配音)二选一
|
||||
boolean hasVoiceId = StrUtil.isNotBlank(task.getVoiceId());
|
||||
boolean hasVoiceConfigId = task.getVoiceConfigId() != null;
|
||||
|
||||
if (!hasVoiceId && !hasVoiceConfigId) {
|
||||
throw new Exception("音色ID不能为空(需提供voiceId或voiceConfigId)");
|
||||
}
|
||||
if (StrUtil.isBlank(task.getInputText())) {
|
||||
throw new Exception("输入文本不能为空");
|
||||
}
|
||||
|
||||
log.info("[synthesizeVoice][任务({})开始语音合成][voiceId={}, textLength={}]",
|
||||
task.getId(), task.getVoiceId(), task.getInputText().length());
|
||||
log.info("[synthesizeVoice][任务({})开始语音合成][voiceId={}, voiceConfigId={}, textLength={}]",
|
||||
task.getId(), task.getVoiceId(), task.getVoiceConfigId(), task.getInputText().length());
|
||||
|
||||
// 构建TTS请求参数
|
||||
AppTikVoiceTtsReqVO ttsReqVO = new AppTikVoiceTtsReqVO();
|
||||
ttsReqVO.setInputText(task.getInputText());
|
||||
ttsReqVO.setVoiceId(task.getVoiceId());
|
||||
ttsReqVO.setVoiceId(task.getVoiceId()); // 系统音色
|
||||
ttsReqVO.setVoiceConfigId(task.getVoiceConfigId()); // 用户配音
|
||||
ttsReqVO.setSpeechRate(task.getSpeechRate() != null ? task.getSpeechRate() : 1.0f);
|
||||
ttsReqVO.setVolume(task.getVolume() != null ? task.getVolume() : 0f);
|
||||
ttsReqVO.setInstruction(task.getInstruction());
|
||||
|
||||
@@ -63,7 +63,7 @@ public interface TikUserVoiceService {
|
||||
void transcribeVoice(Long id);
|
||||
|
||||
/**
|
||||
* CosyVoice 文本转语音
|
||||
* 文本转语音
|
||||
*/
|
||||
AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO);
|
||||
|
||||
|
||||
@@ -22,8 +22,6 @@ import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProvider;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.VoiceCloneProviderFactory;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.muye.aimodelconfig.dal.AiModelConfigDO;
|
||||
@@ -89,9 +87,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
@Resource
|
||||
private VoiceCloneProviderFactory voiceProviderFactory;
|
||||
|
||||
@Resource
|
||||
private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig cosyVoiceProviderConfig;
|
||||
|
||||
@Resource
|
||||
private StringRedisTemplate stringRedisTemplate;
|
||||
|
||||
@@ -102,22 +97,16 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
private static final long PREVIEW_CACHE_TTL_SECONDS = 3600;
|
||||
private static final long SYNTH_CACHE_TTL_SECONDS = 24 * 3600;
|
||||
|
||||
/** 供应商类型常量 */
|
||||
private static final String PROVIDER_COSYVOICE = "cosyvoice";
|
||||
private static final String PROVIDER_SILICONFLOW = "siliconflow";
|
||||
|
||||
/** 模型常量 */
|
||||
private static final String MODEL_COSYVOICE = "cosyvoice-v3-flash";
|
||||
private static final String MODEL_SILICONFLOW = "IndexTeam/IndexTTS-2";
|
||||
|
||||
/** 积分平台和类型常量 */
|
||||
private static final String PLATFORM_VOICE = "voice";
|
||||
private static final String MODEL_CODE_TTS = "tts";
|
||||
private static final String MODEL_CODE_CLONE = "clone";
|
||||
|
||||
@Resource
|
||||
private PointsService pointsService;
|
||||
|
||||
/** SiliconFlow 参考音频最大大小:5MB */
|
||||
private static final int MAX_REFERENCE_AUDIO_SIZE = 5 * 1024 * 1024;
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public Long createVoice(AppTikUserVoiceCreateReqVO createReqVO) {
|
||||
@@ -128,7 +117,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
|
||||
// 校验文件大小(SiliconFlow API 限制参考音频不超过 5MB)
|
||||
if (fileDO.getSize() != null && fileDO.getSize() > MAX_REFERENCE_AUDIO_SIZE) {
|
||||
double sizeMB = fileDO.getSize() / (1024.0 * 1024.0);
|
||||
throw exception(VOICE_FILE_NOT_EXISTS,
|
||||
String.format("音频文件过大(%.1fMB),请上传小于5MB的音频文件", sizeMB));
|
||||
}
|
||||
|
||||
// 验证文件分类是否为voice(通过tik_user_file表查询)
|
||||
TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
|
||||
.eq(TikUserFileDO::getFileId, createReqVO.getFileId())
|
||||
@@ -158,51 +154,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
.setTranscription(createReqVO.getText()); // 使用前端传入的文本
|
||||
voiceMapper.insert(voice);
|
||||
|
||||
// 4. 调用语音克隆服务,生成 voice_id
|
||||
if (StrUtil.isNotBlank(createReqVO.getText())) {
|
||||
try {
|
||||
// 4.1 获取积分配置并预检
|
||||
AiModelConfigDO config = pointsService.getConfig(PLATFORM_VOICE, MODEL_CODE_CLONE);
|
||||
pointsService.checkPoints(userId.toString(), config.getConsumePoints());
|
||||
|
||||
log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]",
|
||||
voice.getId(), fileDO.getId(), createReqVO.getProviderType());
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
|
||||
String providerType = getProviderType(createReqVO.getProviderType(), provider);
|
||||
String model = getModelByProvider(providerType);
|
||||
|
||||
VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
|
||||
cloneRequest.setAudioUrl(fileAccessUrl);
|
||||
cloneRequest.setModel(model);
|
||||
cloneRequest.setPrefix("voice" + voice.getId());
|
||||
cloneRequest.setTranscriptionText(createReqVO.getText()); // 使用前端传入的文本
|
||||
|
||||
VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
|
||||
String voiceId = cloneResult.getVoiceId();
|
||||
|
||||
voice.setVoiceId(voiceId);
|
||||
voiceMapper.updateById(voice);
|
||||
|
||||
// 4.2 音色克隆成功,扣减积分
|
||||
try {
|
||||
pointsService.deductPoints(userId.toString(), config.getConsumePoints(), "voice_clone", voice.getId().toString());
|
||||
log.info("[createVoice][用户 {} 扣减 {} 积分(音色克隆)]", userId, config.getConsumePoints());
|
||||
} catch (Exception e) {
|
||||
log.error("[createVoice][积分扣减失败: {}]", e.getMessage());
|
||||
}
|
||||
|
||||
log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId);
|
||||
} catch (Exception e) {
|
||||
log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
|
||||
// 失败不扣费
|
||||
}
|
||||
} else {
|
||||
log.info("[createVoice][未提供文本,跳过语音复刻,配音编号({})]", voice.getId());
|
||||
}
|
||||
|
||||
|
||||
log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId());
|
||||
return voice.getId();
|
||||
}
|
||||
@@ -300,10 +251,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (CollUtil.isNotEmpty(fileIds)) {
|
||||
List<FileDO> files = fileMapper.selectBatchIds(fileIds);
|
||||
Map<Long, FileDO> tempFileMap = files.stream()
|
||||
.collect(Collectors.toMap(FileDO::getId, file -> file));
|
||||
fileMap.putAll(tempFileMap);
|
||||
fileMapper.selectBatchIds(fileIds).forEach(file -> fileMap.put(file.getId(), file));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,26 +360,18 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
|
||||
}
|
||||
|
||||
// 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据)
|
||||
if (StrUtil.isNotBlank(voice.getVoiceId())) {
|
||||
log.info("[synthesizeVoice][使用复刻音色ID合成,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId());
|
||||
voiceId = voice.getVoiceId();
|
||||
// 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText
|
||||
transcriptionText = null; // 清除 transcriptionText,让 determineSynthesisText 只使用 inputText
|
||||
} else {
|
||||
log.info("[synthesizeVoice][使用文件URL合成,配音编号({})]", voiceConfigId);
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
// 使用动态音色模式(fileUrl + transcriptionText)
|
||||
log.info("[synthesizeVoice][使用动态音色模式,配音编号({})]", voiceConfigId);
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
}
|
||||
// 2. 如果没有配置ID,使用voiceId或fileUrl(系统音色或直接URL方式)
|
||||
@@ -555,21 +495,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不存在");
|
||||
}
|
||||
|
||||
voiceId = voice.getVoiceId();
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
fileUrl = null;
|
||||
referenceText = null;
|
||||
} else {
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
referenceText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(referenceText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
// 使用动态音色模式
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
referenceText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(referenceText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
voiceId = null;
|
||||
}
|
||||
// 3. 系统配音
|
||||
else {
|
||||
@@ -623,21 +559,10 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return buildPreviewResp(audioBase64, format, voiceId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 CosyVoice 配置
|
||||
*/
|
||||
private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() {
|
||||
return cosyVoiceProviderConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取默认音频格式
|
||||
*/
|
||||
private String getDefaultFormat() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getAudioFormat();
|
||||
}
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
@@ -645,10 +570,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
* 获取默认采样率
|
||||
*/
|
||||
private Integer getDefaultSampleRate() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getSampleRate();
|
||||
}
|
||||
return 24000;
|
||||
}
|
||||
|
||||
@@ -664,16 +585,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
}
|
||||
|
||||
private String resolveContentType(String format) {
|
||||
if ("wav".equalsIgnoreCase(format)) {
|
||||
return "audio/wav";
|
||||
}
|
||||
if ("mp3".equalsIgnoreCase(format)) {
|
||||
if (format == null) {
|
||||
return "audio/mpeg";
|
||||
}
|
||||
if ("flac".equalsIgnoreCase(format)) {
|
||||
return "audio/flac";
|
||||
}
|
||||
return "audio/mpeg";
|
||||
return switch (format.toLowerCase()) {
|
||||
case "wav" -> "audio/wav";
|
||||
case "flac" -> "audio/flac";
|
||||
default -> "audio/mpeg";
|
||||
};
|
||||
}
|
||||
|
||||
private String determineSynthesisText(String transcriptionText, String inputText, boolean allowFallback) {
|
||||
@@ -828,74 +747,25 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return respVO;
|
||||
}
|
||||
|
||||
@lombok.Data
|
||||
@lombok.NoArgsConstructor
|
||||
@lombok.AllArgsConstructor
|
||||
private static class PreviewCacheEntry {
|
||||
private String audioBase64;
|
||||
private String format;
|
||||
private Integer sampleRate;
|
||||
private String requestId;
|
||||
|
||||
public PreviewCacheEntry() {}
|
||||
|
||||
public PreviewCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId) {
|
||||
this.audioBase64 = audioBase64;
|
||||
this.format = format;
|
||||
this.sampleRate = sampleRate;
|
||||
this.requestId = requestId;
|
||||
}
|
||||
|
||||
public String getAudioBase64() {
|
||||
return audioBase64;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public Integer getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public String getRequestId() {
|
||||
return requestId;
|
||||
}
|
||||
}
|
||||
|
||||
@lombok.Data
|
||||
@lombok.NoArgsConstructor
|
||||
@lombok.AllArgsConstructor
|
||||
private static class SynthCacheEntry {
|
||||
private String audioBase64;
|
||||
private String format;
|
||||
private Integer sampleRate;
|
||||
private String requestId;
|
||||
private String voiceId;
|
||||
|
||||
public SynthCacheEntry() {}
|
||||
|
||||
public SynthCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId, String voiceId) {
|
||||
this.audioBase64 = audioBase64;
|
||||
this.format = format;
|
||||
this.sampleRate = sampleRate;
|
||||
this.requestId = requestId;
|
||||
this.voiceId = voiceId;
|
||||
}
|
||||
|
||||
public String getAudioBase64() {
|
||||
return audioBase64;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public Integer getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public String getRequestId() {
|
||||
return requestId;
|
||||
}
|
||||
|
||||
public String getVoiceId() {
|
||||
return voiceId;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1116,10 +986,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
* 获取默认音色ID
|
||||
*/
|
||||
private String getDefaultVoiceId() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getDefaultVoiceId();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1127,32 +993,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
* 获取试听文本
|
||||
*/
|
||||
private String getPreviewText() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getPreviewText();
|
||||
}
|
||||
return "您好,欢迎体验专属音色。";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取供应商类型
|
||||
*/
|
||||
private String getProviderType(String requestProviderType, VoiceCloneProvider provider) {
|
||||
if (StrUtil.isNotBlank(requestProviderType)) {
|
||||
return requestProviderType;
|
||||
}
|
||||
return provider.getProviderType();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据供应商类型获取对应的模型
|
||||
*/
|
||||
private String getModelByProvider(String providerType) {
|
||||
if (PROVIDER_SILICONFLOW.equalsIgnoreCase(providerType)) {
|
||||
return MODEL_SILICONFLOW;
|
||||
}
|
||||
return MODEL_COSYVOICE; // 默认使用 CosyVoice 模型
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ public class AppTikDigitalHumanCreateReqVO {
|
||||
@Size(max = 1024, message = "视频URL不能超过1024个字符")
|
||||
private String videoUrl;
|
||||
|
||||
@Schema(description = "音色ID(CosyVoice voiceId,系统音色使用)", example = "cosyvoice-v3-flash-sys-xxx")
|
||||
@Schema(description = "音色ID(系统音色使用)", example = "alex")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "用户音色配置ID(tik_user_voice.id,用户音色使用)", example = "123")
|
||||
|
||||
@@ -37,7 +37,7 @@ public class AppTikDigitalHumanRespVO {
|
||||
@Schema(description = "配音配置ID", example = "789")
|
||||
private Long voiceConfigId;
|
||||
|
||||
@Schema(description = "voice_id", example = "cosyvoice-v3-flash-xxx")
|
||||
@Schema(description = "voice_id", example = "voice-xxx")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "语速", example = "1.0")
|
||||
|
||||
@@ -39,7 +39,7 @@ public class AppTikUserVoiceCreateReqVO {
|
||||
@Size(max = 4000, message = "音频文本不能超过 4000 个字符")
|
||||
private String text;
|
||||
|
||||
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
|
||||
@Schema(description = "供应商类型:siliconflow-硅基流动(不传则使用默认)", example = "siliconflow")
|
||||
private String providerType;
|
||||
|
||||
}
|
||||
|
||||
@@ -38,9 +38,6 @@ public class AppTikUserVoiceRespVO {
|
||||
@Schema(description = "备注", example = "这是一个测试配音")
|
||||
private String note;
|
||||
|
||||
@Schema(description = "复刻音色ID(CosyVoice 语音复刻生成的 voice_id)")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime createTime;
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ public class AppTikVoicePreviewReqVO {
|
||||
@Schema(description = "配音编号(tik_user_voice.id),用户配音必传,系统配音可不传")
|
||||
private Long voiceConfigId;
|
||||
|
||||
@Schema(description = "CosyVoice音色ID(系统配音必传,用户配音可不传)")
|
||||
@Schema(description = "音色ID(系统配音必传,用户配音可不传)")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "语音文件URL(当使用语音URL合成时必传,替代voiceId)")
|
||||
@@ -43,7 +43,7 @@ public class AppTikVoicePreviewReqVO {
|
||||
@Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读")
|
||||
private String instruction;
|
||||
|
||||
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
|
||||
@Schema(description = "供应商类型:siliconflow-硅基流动(不传则使用默认)", example = "siliconflow")
|
||||
private String providerType;
|
||||
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ public class AppTikVoicePreviewRespVO {
|
||||
@Schema(description = "采样率", example = "24000")
|
||||
private Integer sampleRate;
|
||||
|
||||
@Schema(description = "CosyVoice 请求ID")
|
||||
@Schema(description = "请求ID")
|
||||
private String requestId;
|
||||
|
||||
@Schema(description = "使用的音色 ID")
|
||||
|
||||
@@ -21,13 +21,13 @@ public class AppTikVoiceTtsReqVO {
|
||||
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
|
||||
private String transcriptionText;
|
||||
|
||||
@Schema(description = "音色 ID(CosyVoice voiceId)", example = "cosyvoice-v3-flash-myvoice-xxx")
|
||||
@Schema(description = "音色 ID(系统音色)", example = "alex")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)")
|
||||
private String fileUrl;
|
||||
|
||||
@Schema(description = "模型名称,默认 cosyvoice-v3-flash", example = "cosyvoice-v3-flash")
|
||||
@Schema(description = "模型名称", example = "IndexTeam/IndexTTS-2")
|
||||
private String model;
|
||||
|
||||
@Schema(description = "语速,默认 1.0", example = "1.0")
|
||||
@@ -45,7 +45,7 @@ public class AppTikVoiceTtsReqVO {
|
||||
@Schema(description = "音频格式,默认 wav,可选 mp3")
|
||||
private String audioFormat;
|
||||
|
||||
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
|
||||
@Schema(description = "供应商类型:siliconflow-硅基流动(不传则使用默认)", example = "siliconflow")
|
||||
private String providerType;
|
||||
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Schema(description = "CosyVoice 文本转语音响应")
|
||||
@Schema(description = "文本转语音响应")
|
||||
public class AppTikVoiceTtsRespVO {
|
||||
|
||||
@Schema(description = "用户文件编号", example = "1024")
|
||||
@@ -23,7 +23,7 @@ public class AppTikVoiceTtsRespVO {
|
||||
@Schema(description = "采样率", example = "24000")
|
||||
private Integer sampleRate;
|
||||
|
||||
@Schema(description = "CosyVoice 请求ID")
|
||||
@Schema(description = "请求ID")
|
||||
private String requestId;
|
||||
|
||||
@Schema(description = "使用的音色 ID")
|
||||
|
||||
Reference in New Issue
Block a user