增加轨迹流动
This commit is contained in:
@@ -0,0 +1,123 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpRequest;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadResponse;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.SiliconFlowProviderConfig;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* 硅基流动 API 客户端
|
||||
*
|
||||
* <p>提供硅基流动语音服务的 HTTP 调用能力。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class SiliconFlowApi {
|
||||
|
||||
private final SiliconFlowProviderConfig config;
|
||||
|
||||
/**
|
||||
* 上传参考音频(语音克隆)
|
||||
*
|
||||
* @param request 上传请求
|
||||
* @return 上传响应,包含音色 URI
|
||||
*/
|
||||
public SiliconFlowVoiceUploadResponse uploadVoice(SiliconFlowVoiceUploadRequest request) {
|
||||
String url = config.getBaseUrl() + config.getVoiceUploadUrl();
|
||||
|
||||
log.info("[SiliconFlowApi][上传参考音频][url={}, model={}, customName={}]",
|
||||
url, request.getModel(), request.getCustomName());
|
||||
|
||||
try {
|
||||
String requestBody = JSONUtil.toJsonStr(request);
|
||||
log.debug("[SiliconFlowApi][请求体]{}", requestBody);
|
||||
|
||||
HttpResponse response = HttpRequest.post(url)
|
||||
.header("Authorization", "Bearer " + config.getApiKey())
|
||||
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
|
||||
.body(requestBody)
|
||||
.timeout((int) config.getConnectTimeout().toMillis())
|
||||
.execute();
|
||||
|
||||
String responseBody = response.body();
|
||||
log.debug("[SiliconFlowApi][响应体]{}", responseBody);
|
||||
|
||||
if (!response.isOk()) {
|
||||
log.error("[SiliconFlowApi][上传失败][code={}, body={}]",
|
||||
response.getStatus(), responseBody);
|
||||
throw new RuntimeException("硅基流动上传参考音频失败: " + responseBody);
|
||||
}
|
||||
|
||||
SiliconFlowVoiceUploadResponse result = JSONUtil.toBean(responseBody,
|
||||
SiliconFlowVoiceUploadResponse.class);
|
||||
|
||||
if (StrUtil.isBlank(result.getUri())) {
|
||||
throw new RuntimeException("硅基流动上传参考音频失败: 响应中缺少 uri");
|
||||
}
|
||||
|
||||
log.info("[SiliconFlowApi][上传成功][uri={}]", result.getUri());
|
||||
return result;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowApi][上传异常]", e);
|
||||
throw new RuntimeException("硅基流动上传参考音频异常: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 文本转语音
|
||||
*
|
||||
* @param request TTS 请求
|
||||
* @return 音频数据(base64 编码)
|
||||
*/
|
||||
public String synthesize(SiliconFlowTtsRequest request) {
|
||||
String url = config.getBaseUrl() + config.getTtsUrl();
|
||||
|
||||
log.info("[SiliconFlowApi][文本转语音][url={}, model={}, inputLength={}]",
|
||||
url, request.getModel(),
|
||||
request.getInput() != null ? request.getInput().length() : 0);
|
||||
|
||||
try {
|
||||
String requestBody = JSONUtil.toJsonStr(request);
|
||||
log.debug("[SiliconFlowApi][请求体]{}", requestBody);
|
||||
|
||||
HttpResponse response = HttpRequest.post(url)
|
||||
.header("Authorization", "Bearer " + config.getApiKey())
|
||||
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
|
||||
.body(requestBody)
|
||||
.timeout((int) config.getReadTimeout().toMillis())
|
||||
.execute();
|
||||
|
||||
if (!response.isOk()) {
|
||||
String errorBody = response.body();
|
||||
log.error("[SiliconFlowApi][合成失败][code={}, body={}]",
|
||||
response.getStatus(), errorBody);
|
||||
throw new RuntimeException("硅基流动文本转语音失败: " + errorBody);
|
||||
}
|
||||
|
||||
// 硅基流动直接返回二进制音频数据
|
||||
byte[] audioBytes = response.bodyBytes();
|
||||
String base64Audio = java.util.Base64.getEncoder().encodeToString(audioBytes);
|
||||
|
||||
log.info("[SiliconFlowApi][合成成功][format={}, size={}]",
|
||||
request.getResponseFormat(), audioBytes.length);
|
||||
return base64Audio;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowApi][合成异常]", e);
|
||||
throw new RuntimeException("硅基流动文本转语音异常: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.SiliconFlowVoiceUploadResponse;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.VoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.SiliconFlowProviderConfig;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.VoiceProviderProperties;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.Base64;
|
||||
|
||||
/**
|
||||
* 硅基流动 Provider 实现
|
||||
*
|
||||
* <p>硅基流动语音服务的 Provider 实现。
|
||||
* 内部委托给 {@link SiliconFlowApi} 进行实际的API调用。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class SiliconFlowProvider implements VoiceCloneProvider {
|
||||
|
||||
private static final String PROVIDER_TYPE = "siliconflow";
|
||||
private static final String AUDIO_MIME_TYPE = "data:audio/mpeg;base64,";
|
||||
|
||||
private final SiliconFlowApi siliconFlowApi;
|
||||
private final VoiceProviderProperties voiceProviderProperties;
|
||||
|
||||
/**
|
||||
* 获取硅基流动配置
|
||||
*/
|
||||
private SiliconFlowProviderConfig getConfig() {
|
||||
var baseConfig = voiceProviderProperties.getProviderConfig("siliconflow");
|
||||
if (baseConfig instanceof SiliconFlowProviderConfig config) {
|
||||
return config;
|
||||
}
|
||||
|
||||
// 返回默认配置
|
||||
return new SiliconFlowProviderConfig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceCloneResult cloneVoice(VoiceCloneRequest request) {
|
||||
log.info("[SiliconFlowProvider][语音克隆][audioUrl={}, model={}]",
|
||||
request.getAudioUrl(), request.getModel());
|
||||
|
||||
SiliconFlowProviderConfig config = getConfig();
|
||||
|
||||
try {
|
||||
byte[] audioData = downloadAudio(request.getAudioUrl());
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
|
||||
SiliconFlowVoiceUploadRequest sfRequest = new SiliconFlowVoiceUploadRequest();
|
||||
sfRequest.setModel(getOrDefault(request.getModel(), config.getDefaultModel()));
|
||||
sfRequest.setCustomName(getOrDefault(request.getPrefix(), "voice_" + System.currentTimeMillis()));
|
||||
sfRequest.setText(getOrDefault(request.getTranscriptionText(), config.getPreviewText()));
|
||||
sfRequest.setAudio(AUDIO_MIME_TYPE + base64Audio);
|
||||
|
||||
SiliconFlowVoiceUploadResponse sfResponse = siliconFlowApi.uploadVoice(sfRequest);
|
||||
|
||||
VoiceCloneResult result = new VoiceCloneResult();
|
||||
result.setVoiceId(sfResponse.getUri());
|
||||
result.setRequestId(sfResponse.getUri());
|
||||
|
||||
log.info("[SiliconFlowProvider][语音克隆成功][voiceId={}]", result.getVoiceId());
|
||||
return result;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowProvider][语音克隆失败]", e);
|
||||
throw new RuntimeException("硅基流动语音克隆失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private String getOrDefault(String value, String defaultValue) {
|
||||
return StrUtil.isNotBlank(value) ? value : defaultValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VoiceTtsResult synthesize(VoiceTtsRequest request) {
|
||||
log.info("[SiliconFlowProvider][语音合成][voiceId={}, textLength={}, model={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText() != null ? request.getText().length() : 0,
|
||||
request.getModel());
|
||||
|
||||
SiliconFlowProviderConfig config = getConfig();
|
||||
|
||||
try {
|
||||
SiliconFlowTtsRequest sfRequest = SiliconFlowTtsRequest.builder()
|
||||
.model(getOrDefault(request.getModel(), config.getDefaultModel()))
|
||||
.input(request.getText())
|
||||
.voice(request.getVoiceId())
|
||||
.speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
|
||||
.sampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate())
|
||||
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
|
||||
.build();
|
||||
|
||||
String base64Audio = siliconFlowApi.synthesize(sfRequest);
|
||||
|
||||
VoiceTtsResult result = new VoiceTtsResult();
|
||||
result.setAudio(base64Audio);
|
||||
result.setFormat(sfRequest.getResponseFormat());
|
||||
result.setSampleRate(sfRequest.getSampleRate());
|
||||
result.setVoiceId(request.getVoiceId());
|
||||
|
||||
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",
|
||||
result.getFormat(), base64Audio != null ? base64Audio.length() : 0);
|
||||
return result;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowProvider][语音合成失败]", e);
|
||||
throw new RuntimeException("硅基流动语音合成失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supports(String providerType) {
|
||||
return PROVIDER_TYPE.equalsIgnoreCase(providerType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProviderType() {
|
||||
return PROVIDER_TYPE;
|
||||
}
|
||||
|
||||
private byte[] downloadAudio(String audioUrl) {
|
||||
log.info("[SiliconFlowProvider][下载音频][url={}]", audioUrl);
|
||||
try {
|
||||
URL url = new URL(audioUrl);
|
||||
try (InputStream in = url.openStream();
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
||||
|
||||
byte[] buffer = new byte[8192];
|
||||
int bytesRead;
|
||||
while ((bytesRead = in.read(buffer)) != -1) {
|
||||
out.write(buffer, 0, bytesRead);
|
||||
}
|
||||
byte[] result = out.toByteArray();
|
||||
log.info("[SiliconFlowProvider][下载成功][size={}]", result.length);
|
||||
return result;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("[SiliconFlowProvider][下载失败]", e);
|
||||
throw new RuntimeException("下载音频文件失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 硅基流动文本转语音请求
|
||||
*
|
||||
* <p>对应 API: POST /v1/audio/speech
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class SiliconFlowTtsRequest {
|
||||
|
||||
/**
|
||||
* 模型名称
|
||||
*/
|
||||
private String model;
|
||||
|
||||
/**
|
||||
* 待合成文本
|
||||
*/
|
||||
private String input;
|
||||
|
||||
/**
|
||||
* 音色 ID(使用上传参考音频返回的 uri)
|
||||
*/
|
||||
private String voice;
|
||||
|
||||
/**
|
||||
* 语速(0.25 - 4.0,默认 1.0)
|
||||
*/
|
||||
private Float speed;
|
||||
|
||||
/**
|
||||
* 采样率(如 24000)
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 响应格式(mp3, wav, pcm)
|
||||
*/
|
||||
private String responseFormat;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 硅基流动上传参考音频请求
|
||||
*
|
||||
* <p>对应 API: POST /v1/uploads/audio/voice
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
public class SiliconFlowVoiceUploadRequest {
|
||||
|
||||
/**
|
||||
* 模型名称
|
||||
*/
|
||||
private String model;
|
||||
|
||||
/**
|
||||
* 用户自定义音色名称
|
||||
*/
|
||||
private String customName;
|
||||
|
||||
/**
|
||||
* 音频对应的文本内容
|
||||
*/
|
||||
private String text;
|
||||
|
||||
/**
|
||||
* 音频数据(base64 编码,格式:data:audio/mpeg;base64,xxx)
|
||||
*/
|
||||
private String audio;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 硅基流动上传参考音频响应
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
public class SiliconFlowVoiceUploadResponse {
|
||||
|
||||
/**
|
||||
* 音色 URI(格式:speech:customName:xxx:xxx)
|
||||
*/
|
||||
private String uri;
|
||||
|
||||
}
|
||||
@@ -48,4 +48,12 @@ public class VoiceCloneRequest {
|
||||
* <p>可选值: mp3, wav, flac</p>
|
||||
*/
|
||||
private String audioFormat;
|
||||
|
||||
/**
|
||||
* 转录文本(可选)
|
||||
*
|
||||
* <p>SiliconFlow: 音频对应的文本内容</p>
|
||||
* <p>CosyVoice: 不适用</p>
|
||||
*/
|
||||
private String transcriptionText;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* 硅基流动供应商配置
|
||||
*
|
||||
* <p>继承通用配置,添加硅基流动特有字段。
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderConfig {
|
||||
|
||||
/**
|
||||
* API 基础 URL
|
||||
*/
|
||||
private String baseUrl = "https://api.siliconflow.cn";
|
||||
|
||||
/**
|
||||
* 默认模型
|
||||
*/
|
||||
private String defaultModel = "IndexTeam/IndexTTS-2";
|
||||
|
||||
/**
|
||||
* 默认采样率
|
||||
*/
|
||||
private Integer sampleRate = 24000;
|
||||
|
||||
/**
|
||||
* 默认音频格式
|
||||
*/
|
||||
private String audioFormat = "mp3";
|
||||
|
||||
/**
|
||||
* 试听默认示例文本
|
||||
*/
|
||||
private String previewText = "您好,欢迎体验专属音色。";
|
||||
|
||||
/**
|
||||
* TTS 接口地址
|
||||
*/
|
||||
private String ttsUrl = "/v1/audio/speech";
|
||||
|
||||
/**
|
||||
* 语音复刻接口地址(上传参考音频)
|
||||
*/
|
||||
private String voiceUploadUrl = "/v1/uploads/audio/voice";
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
private Duration connectTimeout = Duration.ofSeconds(10);
|
||||
|
||||
/**
|
||||
* 读取超时时间(3分钟,提升语音合成成功率)
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(180);
|
||||
|
||||
}
|
||||
@@ -104,6 +104,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
private static final long PREVIEW_CACHE_TTL_SECONDS = 3600;
|
||||
private static final long SYNTH_CACHE_TTL_SECONDS = 24 * 3600;
|
||||
|
||||
/** 供应商类型常量 */
|
||||
private static final String PROVIDER_COSYVOICE = "cosyvoice";
|
||||
private static final String PROVIDER_SILICONFLOW = "siliconflow";
|
||||
|
||||
/** 模型常量 */
|
||||
private static final String MODEL_COSYVOICE = "cosyvoice-v3-flash";
|
||||
private static final String MODEL_SILICONFLOW = "IndexTeam/IndexTTS-2";
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public Long createVoice(AppTikUserVoiceCreateReqVO createReqVO) {
|
||||
@@ -150,24 +158,25 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
voice.getId(), fileDO.getId(), createReqVO.getProviderType());
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
// 使用 Provider 接口(支持前端选择供应商,不传则使用默认)
|
||||
VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
|
||||
String providerType = getProviderType(createReqVO.getProviderType(), provider);
|
||||
String model = getModelByProvider(providerType);
|
||||
|
||||
VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
|
||||
cloneRequest.setAudioUrl(fileAccessUrl);
|
||||
cloneRequest.setModel("cosyvoice-v3-flash"); // 使用v3-flash模型
|
||||
cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求
|
||||
cloneRequest.setModel(model);
|
||||
cloneRequest.setPrefix("voice" + voice.getId());
|
||||
cloneRequest.setTranscriptionText(voice.getTranscription());
|
||||
|
||||
VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
|
||||
String voiceId = cloneResult.getVoiceId();
|
||||
|
||||
// 更新配音记录,保存 voice_id
|
||||
voice.setVoiceId(voiceId);
|
||||
voiceMapper.updateById(voice);
|
||||
|
||||
log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId);
|
||||
} catch (Exception e) {
|
||||
log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
|
||||
// 复刻失败不影响配音记录创建,只记录日志
|
||||
}
|
||||
|
||||
|
||||
@@ -1192,5 +1201,25 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return "您好,欢迎体验专属音色。";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取供应商类型
|
||||
*/
|
||||
private String getProviderType(String requestProviderType, VoiceCloneProvider provider) {
|
||||
if (StrUtil.isNotBlank(requestProviderType)) {
|
||||
return requestProviderType;
|
||||
}
|
||||
return provider.getProviderType();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据供应商类型获取对应的模型
|
||||
*/
|
||||
private String getModelByProvider(String providerType) {
|
||||
if (PROVIDER_SILICONFLOW.equalsIgnoreCase(providerType)) {
|
||||
return MODEL_SILICONFLOW;
|
||||
}
|
||||
return MODEL_COSYVOICE; // 默认使用 CosyVoice 模型
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user