代码优化
This commit is contained in:
@@ -7,7 +7,7 @@ import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
|
||||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||||
import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice;
|
||||
@@ -45,7 +45,7 @@ public class CosyVoiceClient {
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private final CosyVoiceProperties properties;
|
||||
private final CosyVoiceProviderConfig config;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
private volatile OkHttpClient httpClient;
|
||||
@@ -54,7 +54,7 @@ public class CosyVoiceClient {
|
||||
* 调用 CosyVoice TTS 接口
|
||||
*/
|
||||
public CosyVoiceTtsResult synthesize(CosyVoiceTtsRequest request) {
|
||||
if (!properties.isEnabled()) {
|
||||
if (!config.isEnabled()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getText())) {
|
||||
@@ -69,15 +69,15 @@ public class CosyVoiceClient {
|
||||
log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, instruction={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText().length(),
|
||||
StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()),
|
||||
StrUtil.blankToDefault(request.getModel(), config.getDefaultModel()),
|
||||
request.getSpeechRate(),
|
||||
request.getInstruction());
|
||||
|
||||
// 使用 DashScope SDK 构建参数(严格按文档)
|
||||
// 注意:speechRate 和 volume 需要转换为 int 类型
|
||||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||||
.apiKey(properties.getApiKey())
|
||||
.model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()))
|
||||
.apiKey(config.getApiKey())
|
||||
.model(StrUtil.blankToDefault(request.getModel(), config.getDefaultModel()))
|
||||
.voice(request.getVoiceId())
|
||||
.speechRate(request.getSpeechRate() != null ? request.getSpeechRate().intValue() : 1)
|
||||
.volume(request.getVolume() != null ? request.getVolume().intValue() : 0)
|
||||
@@ -108,8 +108,8 @@ public class CosyVoiceClient {
|
||||
// 构建返回结果
|
||||
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : properties.getAudioFormat());
|
||||
result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate());
|
||||
result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : config.getAudioFormat());
|
||||
result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate());
|
||||
result.setRequestId(synthesizer.getLastRequestId());
|
||||
result.setVoiceId(request.getVoiceId());
|
||||
|
||||
@@ -138,8 +138,8 @@ public class CosyVoiceClient {
|
||||
private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception {
|
||||
String payload = objectMapper.writeValueAsString(buildPayload(request));
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(properties.getTtsUrl())
|
||||
.addHeader("Authorization", "Bearer " + properties.getApiKey())
|
||||
.url(config.getTtsUrl())
|
||||
.addHeader("Authorization", "Bearer " + config.getApiKey())
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
|
||||
.build();
|
||||
@@ -158,7 +158,7 @@ public class CosyVoiceClient {
|
||||
* 调用 CosyVoice 语音复刻接口(声音注册)
|
||||
*/
|
||||
public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) {
|
||||
if (!properties.isEnabled()) {
|
||||
if (!config.isEnabled()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getUrl())) {
|
||||
@@ -176,7 +176,7 @@ public class CosyVoiceClient {
|
||||
request.getTargetModel(), request.getPrefix(), request.getUrl());
|
||||
|
||||
// 使用 DashScope SDK 创建语音复刻
|
||||
VoiceEnrollmentService service = new VoiceEnrollmentService(properties.getApiKey());
|
||||
VoiceEnrollmentService service = new VoiceEnrollmentService(config.getApiKey());
|
||||
Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl());
|
||||
|
||||
log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]",
|
||||
@@ -199,7 +199,7 @@ public class CosyVoiceClient {
|
||||
|
||||
private Map<String, Object> buildPayload(CosyVoiceTtsRequest request) {
|
||||
Map<String, Object> payload = new HashMap<>();
|
||||
String model = StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel());
|
||||
String model = StrUtil.blankToDefault(request.getModel(), config.getDefaultModel());
|
||||
payload.put("model", model);
|
||||
|
||||
Map<String, Object> input = new HashMap<>();
|
||||
@@ -218,7 +218,7 @@ public class CosyVoiceClient {
|
||||
}
|
||||
} else {
|
||||
// 使用系统音色
|
||||
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
|
||||
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), config.getDefaultVoiceId());
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
input.put("voice", voiceId);
|
||||
log.info("[CosyVoice][使用系统音色][voice={}]", voiceId);
|
||||
@@ -229,11 +229,11 @@ public class CosyVoiceClient {
|
||||
payload.put("input", input);
|
||||
|
||||
Map<String, Object> parameters = new HashMap<>();
|
||||
int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate();
|
||||
int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate();
|
||||
parameters.put("sample_rate", sampleRate);
|
||||
|
||||
// 根据官方文档,统一使用小写格式
|
||||
String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat()).toLowerCase();
|
||||
String format = StrUtil.blankToDefault(request.getAudioFormat(), config.getAudioFormat()).toLowerCase();
|
||||
parameters.put("format", format);
|
||||
|
||||
if (request.getSpeechRate() != null) {
|
||||
@@ -280,8 +280,8 @@ public class CosyVoiceClient {
|
||||
byte[] audioBytes = Base64.getDecoder().decode(content);
|
||||
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(firstAudio.path("format").asText(StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat())));
|
||||
result.setSampleRate(firstAudio.path("sample_rate").asInt(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate()));
|
||||
result.setFormat(firstAudio.path("format").asText(StrUtil.blankToDefault(request.getAudioFormat(), config.getAudioFormat())));
|
||||
result.setSampleRate(firstAudio.path("sample_rate").asInt(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate()));
|
||||
result.setRequestId(root.path("request_id").asText());
|
||||
result.setVoiceId(firstAudio.path("voice").asText(request.getVoiceId()));
|
||||
return result;
|
||||
@@ -291,8 +291,8 @@ public class CosyVoiceClient {
|
||||
if (httpClient == null) {
|
||||
synchronized (this) {
|
||||
if (httpClient == null) {
|
||||
java.time.Duration connect = defaultDuration(properties.getConnectTimeout(), 10);
|
||||
java.time.Duration read = defaultDuration(properties.getReadTimeout(), 60);
|
||||
java.time.Duration connect = defaultDuration(config.getConnectTimeout(), 10);
|
||||
java.time.Duration read = defaultDuration(config.getReadTimeout(), 60);
|
||||
httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.readTimeout(read.toMillis(), TimeUnit.MILLISECONDS)
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@@ -26,56 +24,19 @@ import org.springframework.stereotype.Component;
|
||||
public class CosyVoiceProvider implements VoiceCloneProvider {
|
||||
|
||||
private final CosyVoiceClient cosyVoiceClient;
|
||||
|
||||
/**
|
||||
* 新配置(支持多供应商)
|
||||
*/
|
||||
private final VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
/**
|
||||
* 旧配置(向后兼容)
|
||||
*/
|
||||
private final CosyVoiceProperties cosyVoiceProperties;
|
||||
|
||||
/**
|
||||
* 获取 CosyVoice 配置
|
||||
* 优先使用新配置,如果不存在则使用旧配置(向后兼容)
|
||||
*/
|
||||
private CosyVoiceProviderConfig getConfig() {
|
||||
// 尝试从新配置获取
|
||||
var baseConfig = voiceProviderProperties.getProviderConfig("cosyvoice");
|
||||
if (baseConfig instanceof CosyVoiceProviderConfig cosyConfig) {
|
||||
return cosyConfig;
|
||||
if (baseConfig instanceof CosyVoiceProviderConfig config) {
|
||||
return config;
|
||||
}
|
||||
|
||||
// 回退到旧配置(向后兼容)
|
||||
if (cosyVoiceProperties != null && cosyVoiceProperties.isEnabled()) {
|
||||
return migrateFromLegacyConfig(cosyVoiceProperties);
|
||||
}
|
||||
|
||||
// 返回空配置
|
||||
return new CosyVoiceProviderConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* 从旧配置迁移到新配置格式
|
||||
*/
|
||||
private CosyVoiceProviderConfig migrateFromLegacyConfig(CosyVoiceProperties legacy) {
|
||||
var config = new CosyVoiceProviderConfig();
|
||||
config.setEnabled(true);
|
||||
config.setApiKey(legacy.getApiKey());
|
||||
config.setDefaultModel(legacy.getDefaultModel());
|
||||
config.setDefaultVoiceId(legacy.getDefaultVoiceId());
|
||||
config.setSampleRate(legacy.getSampleRate());
|
||||
config.setAudioFormat(legacy.getAudioFormat());
|
||||
config.setPreviewText(legacy.getPreviewText());
|
||||
config.setTtsUrl(legacy.getTtsUrl());
|
||||
config.setVoiceEnrollmentUrl(legacy.getVoiceEnrollmentUrl());
|
||||
config.setConnectTimeout(legacy.getConnectTimeout());
|
||||
config.setReadTimeout(legacy.getReadTimeout());
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
|
||||
log.info("[CosyVoiceProvider][语音克隆][audioUrl={}, model={}]",
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpRequest;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadResponse;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.SiliconFlowProviderConfig;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* 硅基流动 API 客户端
|
||||
*
|
||||
* <p>提供硅基流动语音服务的 HTTP 调用能力。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class SiliconFlowApi {
|
||||
|
||||
private final SiliconFlowProviderConfig config;
|
||||
|
||||
/**
|
||||
* 上传参考音频(语音克隆)
|
||||
*
|
||||
* @param request 上传请求
|
||||
* @return 上传响应,包含音色 URI
|
||||
*/
|
||||
public SiliconFlowVoiceUploadResponse uploadVoice(SiliconFlowVoiceUploadRequest request) {
|
||||
String url = config.getBaseUrl() + config.getVoiceUploadUrl();
|
||||
|
||||
log.info("[SiliconFlowApi][上传参考音频][url={}, model={}, customName={}]",
|
||||
url, request.getModel(), request.getCustomName());
|
||||
|
||||
try {
|
||||
String requestBody = JSONUtil.toJsonStr(request);
|
||||
log.debug("[SiliconFlowApi][请求体]{}", requestBody);
|
||||
|
||||
HttpResponse response = HttpRequest.post(url)
|
||||
.header("Authorization", "Bearer " + config.getApiKey())
|
||||
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
|
||||
.body(requestBody)
|
||||
.timeout((int) config.getConnectTimeout().toMillis())
|
||||
.execute();
|
||||
|
||||
String responseBody = response.body();
|
||||
log.debug("[SiliconFlowApi][响应体]{}", responseBody);
|
||||
|
||||
if (!response.isOk()) {
|
||||
log.error("[SiliconFlowApi][上传失败][code={}, body={}]",
|
||||
response.getStatus(), responseBody);
|
||||
throw new RuntimeException("硅基流动上传参考音频失败: " + responseBody);
|
||||
}
|
||||
|
||||
SiliconFlowVoiceUploadResponse result = JSONUtil.toBean(responseBody,
|
||||
SiliconFlowVoiceUploadResponse.class);
|
||||
|
||||
if (StrUtil.isBlank(result.getUri())) {
|
||||
throw new RuntimeException("硅基流动上传参考音频失败: 响应中缺少 uri");
|
||||
}
|
||||
|
||||
log.info("[SiliconFlowApi][上传成功][uri={}]", result.getUri());
|
||||
return result;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowApi][上传异常]", e);
|
||||
throw new RuntimeException("硅基流动上传参考音频异常: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 文本转语音
|
||||
*
|
||||
* @param request TTS 请求
|
||||
* @return 音频数据(base64 编码)
|
||||
*/
|
||||
public String synthesize(SiliconFlowTtsRequest request) {
|
||||
String url = config.getBaseUrl() + config.getTtsUrl();
|
||||
|
||||
log.info("[SiliconFlowApi][文本转语音][url={}, model={}, inputLength={}]",
|
||||
url, request.getModel(),
|
||||
request.getInput() != null ? request.getInput().length() : 0);
|
||||
|
||||
try {
|
||||
String requestBody = JSONUtil.toJsonStr(request);
|
||||
log.debug("[SiliconFlowApi][请求体]{}", requestBody);
|
||||
|
||||
HttpResponse response = HttpRequest.post(url)
|
||||
.header("Authorization", "Bearer " + config.getApiKey())
|
||||
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
|
||||
.body(requestBody)
|
||||
.timeout((int) config.getReadTimeout().toMillis())
|
||||
.execute();
|
||||
|
||||
if (!response.isOk()) {
|
||||
String errorBody = response.body();
|
||||
log.error("[SiliconFlowApi][合成失败][code={}, body={}]",
|
||||
response.getStatus(), errorBody);
|
||||
throw new RuntimeException("硅基流动文本转语音失败: " + errorBody);
|
||||
}
|
||||
|
||||
// 硅基流动直接返回二进制音频数据
|
||||
byte[] audioBytes = response.bodyBytes();
|
||||
String base64Audio = java.util.Base64.getEncoder().encodeToString(audioBytes);
|
||||
|
||||
log.info("[SiliconFlowApi][合成成功][format={}, size={}]",
|
||||
request.getResponseFormat(), audioBytes.length);
|
||||
return base64Audio;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowApi][合成异常]", e);
|
||||
throw new RuntimeException("硅基流动文本转语音异常: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,6 +1,9 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpRequest;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadResponse;
|
||||
@@ -9,9 +12,9 @@ import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.SiliconFlowProviderConfig;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
@@ -23,7 +26,6 @@ import java.util.Base64;
|
||||
* 硅基流动 Provider 实现
|
||||
*
|
||||
* <p>硅基流动语音服务的 Provider 实现。
|
||||
* 内部委托给 {@link SiliconFlowApi} 进行实际的API调用。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@@ -35,29 +37,17 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
private static final String PROVIDER_TYPE = "siliconflow";
|
||||
private static final String AUDIO_MIME_TYPE = "data:audio/mpeg;base64,";
|
||||
|
||||
private final SiliconFlowApi siliconFlowApi;
|
||||
private final VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
/**
|
||||
* 获取硅基流动配置
|
||||
*/
|
||||
private SiliconFlowProviderConfig getConfig() {
|
||||
var baseConfig = voiceProviderProperties.getProviderConfig("siliconflow");
|
||||
if (baseConfig instanceof SiliconFlowProviderConfig config) {
|
||||
return config;
|
||||
}
|
||||
|
||||
// 返回默认配置
|
||||
return new SiliconFlowProviderConfig();
|
||||
}
|
||||
private final SiliconFlowProviderConfig config;
|
||||
|
||||
@Override
|
||||
public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
|
||||
if (!config.isAvailable()) {
|
||||
throw new RuntimeException("硅基流动供应商未配置或已禁用");
|
||||
}
|
||||
|
||||
log.info("[SiliconFlowProvider][语音克隆][audioUrl={}, model={}]",
|
||||
request.getAudioUrl(), request.getModel());
|
||||
|
||||
SiliconFlowProviderConfig config = getConfig();
|
||||
|
||||
try {
|
||||
byte[] audioData = downloadAudio(request.getAudioUrl());
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
@@ -68,7 +58,33 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
sfRequest.setText(getOrDefault(request.getTranscriptionText(), config.getPreviewText()));
|
||||
sfRequest.setAudio(AUDIO_MIME_TYPE + base64Audio);
|
||||
|
||||
SiliconFlowVoiceUploadResponse sfResponse = siliconFlowApi.uploadVoice(sfRequest);
|
||||
// 调用上传参考音频 API
|
||||
String url = config.getBaseUrl() + config.getVoiceUploadUrl();
|
||||
String requestBody = JSONUtil.toJsonStr(sfRequest);
|
||||
log.debug("[SiliconFlowProvider][请求体]{}", requestBody);
|
||||
|
||||
HttpResponse response = HttpRequest.post(url)
|
||||
.header("Authorization", "Bearer " + config.getApiKey())
|
||||
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
|
||||
.body(requestBody)
|
||||
.timeout((int) config.getConnectTimeout().toMillis())
|
||||
.execute();
|
||||
|
||||
String responseBody = response.body();
|
||||
log.debug("[SiliconFlowProvider][响应体]{}", responseBody);
|
||||
|
||||
if (!response.isOk()) {
|
||||
log.error("[SiliconFlowProvider][上传失败][code={}, body={}]",
|
||||
response.getStatus(), responseBody);
|
||||
throw new RuntimeException("硅基流动上传参考音频失败: " + responseBody);
|
||||
}
|
||||
|
||||
SiliconFlowVoiceUploadResponse sfResponse = JSONUtil.toBean(responseBody,
|
||||
SiliconFlowVoiceUploadResponse.class);
|
||||
|
||||
if (StrUtil.isBlank(sfResponse.getUri())) {
|
||||
throw new RuntimeException("硅基流动上传参考音频失败: 响应中缺少 uri");
|
||||
}
|
||||
|
||||
VoiceCloneResult result = new VoiceCloneResult();
|
||||
result.setVoiceId(sfResponse.getUri());
|
||||
@@ -89,13 +105,15 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
|
||||
@Override
|
||||
public VoiceTtsResult synthesize(VoiceTtsRequest request) {
|
||||
if (!config.isAvailable()) {
|
||||
throw new RuntimeException("硅基流动供应商未配置或已禁用");
|
||||
}
|
||||
|
||||
log.info("[SiliconFlowProvider][语音合成][voiceId={}, textLength={}, model={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText() != null ? request.getText().length() : 0,
|
||||
request.getModel());
|
||||
|
||||
SiliconFlowProviderConfig config = getConfig();
|
||||
|
||||
try {
|
||||
SiliconFlowTtsRequest sfRequest = SiliconFlowTtsRequest.builder()
|
||||
.model(getOrDefault(request.getModel(), config.getDefaultModel()))
|
||||
@@ -106,16 +124,37 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
|
||||
.build();
|
||||
|
||||
String base64Audio = siliconFlowApi.synthesize(sfRequest);
|
||||
// 调用文本转语音 API
|
||||
String url = config.getBaseUrl() + config.getTtsUrl();
|
||||
String requestBody = JSONUtil.toJsonStr(sfRequest);
|
||||
log.debug("[SiliconFlowProvider][请求体]{}", requestBody);
|
||||
|
||||
HttpResponse response = HttpRequest.post(url)
|
||||
.header("Authorization", "Bearer " + config.getApiKey())
|
||||
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
|
||||
.body(requestBody)
|
||||
.timeout((int) config.getReadTimeout().toMillis())
|
||||
.execute();
|
||||
|
||||
if (!response.isOk()) {
|
||||
String errorBody = response.body();
|
||||
log.error("[SiliconFlowProvider][合成失败][code={}, body={}]",
|
||||
response.getStatus(), errorBody);
|
||||
throw new RuntimeException("硅基流动文本转语音失败: " + errorBody);
|
||||
}
|
||||
|
||||
// 硅基流动直接返回二进制音频数据
|
||||
byte[] audioBytes = response.bodyBytes();
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioBytes);
|
||||
|
||||
VoiceTtsResult result = new VoiceTtsResult();
|
||||
result.setAudio(base64Audio);
|
||||
result.setAudio(Base64.getDecoder().decode(base64Audio));
|
||||
result.setFormat(sfRequest.getResponseFormat());
|
||||
result.setSampleRate(sfRequest.getSampleRate());
|
||||
result.setVoiceId(request.getVoiceId());
|
||||
|
||||
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",
|
||||
result.getFormat(), base64Audio != null ? base64Audio.length() : 0);
|
||||
result.getFormat(), result.getAudio() != null ? result.getAudio().length : 0);
|
||||
return result;
|
||||
|
||||
} catch (Exception e) {
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* CosyVoice 配置
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "yudao.cosyvoice")
|
||||
public class CosyVoiceProperties {
|
||||
|
||||
/**
|
||||
* DashScope API Key
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* 默认模型
|
||||
*/
|
||||
private String defaultModel = "cosyvoice-v3-flash";
|
||||
|
||||
/**
|
||||
* 默认 voiceId(可选)
|
||||
*/
|
||||
private String defaultVoiceId;
|
||||
|
||||
/**
|
||||
* 默认采样率
|
||||
*/
|
||||
private Integer sampleRate = 24000;
|
||||
|
||||
/**
|
||||
* 默认音频格式
|
||||
*/
|
||||
private String audioFormat = "mp3";
|
||||
|
||||
/**
|
||||
* 试听默认示例文本
|
||||
*/
|
||||
private String previewText = "您好,欢迎体验专属音色。";
|
||||
|
||||
/**
|
||||
* TTS 接口地址
|
||||
*/
|
||||
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
|
||||
|
||||
/**
|
||||
* 语音复刻接口地址(声音注册)
|
||||
*/
|
||||
private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
private Duration connectTimeout = Duration.ofSeconds(10);
|
||||
|
||||
/**
|
||||
* 读取超时时间(改为3分钟,提升语音合成成功率)
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(180);
|
||||
|
||||
/**
|
||||
* 是否启用
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
public boolean isEnabled() {
|
||||
return enabled && StrUtil.isNotBlank(apiKey);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
@@ -14,6 +16,8 @@ import java.time.Duration;
|
||||
*/
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "yudao.voice.siliconflow")
|
||||
public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderConfig {
|
||||
|
||||
/**
|
||||
@@ -61,4 +65,11 @@ public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderC
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(180);
|
||||
|
||||
/**
|
||||
* 检查是否可用(有 API Key 即可用)
|
||||
*/
|
||||
public boolean isAvailable() {
|
||||
return isEnabled() && getApiKey() != null && !getApiKey().isEmpty();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -25,7 +25,6 @@ import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
|
||||
@@ -88,9 +87,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
@Resource
|
||||
private VoiceCloneProviderFactory voiceProviderFactory;
|
||||
|
||||
@Resource
|
||||
private CosyVoiceProperties cosyVoiceProperties;
|
||||
|
||||
@Resource
|
||||
private VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
@@ -649,8 +645,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 CosyVoice 配置(统一入口)
|
||||
* 优先使用新配置,回退到旧配置
|
||||
* 获取 CosyVoice 配置
|
||||
*/
|
||||
private cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProviderConfig getCosyVoiceConfig() {
|
||||
if (voiceProviderProperties != null) {
|
||||
@@ -664,31 +659,23 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
|
||||
/**
|
||||
* 获取默认音频格式
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private String getDefaultFormat() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getAudioFormat();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getAudioFormat();
|
||||
}
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取默认采样率
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private Integer getDefaultSampleRate() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getSampleRate();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getSampleRate();
|
||||
}
|
||||
return 24000;
|
||||
}
|
||||
|
||||
@@ -1173,31 +1160,23 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
|
||||
/**
|
||||
* 获取默认音色ID
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private String getDefaultVoiceId() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getDefaultVoiceId();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getDefaultVoiceId();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取试听文本
|
||||
* 优先使用新配置,回退到旧配置
|
||||
*/
|
||||
private String getPreviewText() {
|
||||
var config = getCosyVoiceConfig();
|
||||
if (config != null) {
|
||||
return config.getPreviewText();
|
||||
}
|
||||
if (cosyVoiceProperties != null) {
|
||||
return cosyVoiceProperties.getPreviewText();
|
||||
}
|
||||
return "您好,欢迎体验专属音色。";
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user