feat: 功能优化
This commit is contained in:
@@ -3,9 +3,15 @@ package cn.iocoder.yudao.module.tik.voice.client;
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.framework.common.exception.ServiceException;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||||
import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice;
|
||||
import com.alibaba.dashscope.audio.ttsv2.enrollment.VoiceEnrollmentService;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@@ -17,6 +23,7 @@ import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.Base64;
|
||||
@@ -53,28 +60,130 @@ public class CosyVoiceClient {
|
||||
if (request == null || StrUtil.isBlank(request.getText())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
|
||||
}
|
||||
if (StrUtil.isBlank(request.getVoiceId())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "必须提供 voiceId");
|
||||
}
|
||||
|
||||
SpeechSynthesizer synthesizer = null;
|
||||
try {
|
||||
String payload = objectMapper.writeValueAsString(buildPayload(request));
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(properties.getTtsUrl())
|
||||
.addHeader("Authorization", "Bearer " + properties.getApiKey())
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
|
||||
log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}]",
|
||||
request.getVoiceId(),
|
||||
request.getText().length(),
|
||||
StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()));
|
||||
|
||||
// 使用 DashScope SDK 构建参数(严格按文档)
|
||||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||||
.apiKey(properties.getApiKey())
|
||||
.model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()))
|
||||
.voice(request.getVoiceId())
|
||||
.build();
|
||||
|
||||
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
|
||||
String body = response.body() != null ? response.body().string() : "";
|
||||
if (!response.isSuccessful()) {
|
||||
log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
|
||||
throw buildException(body);
|
||||
}
|
||||
return parseTtsResult(body, request);
|
||||
// 初始化合成器(同步调用传 null)
|
||||
synthesizer = new SpeechSynthesizer(param, null);
|
||||
|
||||
// 阻塞调用,获取完整音频
|
||||
ByteBuffer audioData = synthesizer.call(request.getText());
|
||||
|
||||
if (audioData == null) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频数据");
|
||||
}
|
||||
|
||||
// 转换为字节数组(严格按照文档:直接使用 array())
|
||||
byte[] audioBytes = audioData.array();
|
||||
|
||||
log.info("[CosyVoice][TTS合成成功][Request ID: {}, audioSize={}, 首包延迟={}ms]",
|
||||
synthesizer.getLastRequestId(),
|
||||
audioBytes.length,
|
||||
synthesizer.getFirstPackageDelay());
|
||||
|
||||
// 构建返回结果
|
||||
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : properties.getAudioFormat());
|
||||
result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate());
|
||||
result.setRequestId(synthesizer.getLastRequestId());
|
||||
result.setVoiceId(request.getVoiceId());
|
||||
|
||||
return result;
|
||||
|
||||
} catch (ServiceException ex) {
|
||||
throw ex;
|
||||
} catch (Exception ex) {
|
||||
log.error("[CosyVoice][TTS异常]", ex);
|
||||
log.error("[CosyVoice][TTS异常][voiceId={}, text={}]", request.getVoiceId(), request.getText(), ex);
|
||||
throw exception(VOICE_TTS_FAILED);
|
||||
} finally {
|
||||
// 关闭 WebSocket 连接
|
||||
if (synthesizer != null) {
|
||||
try {
|
||||
synthesizer.getDuplexApi().close(1000, "任务结束");
|
||||
} catch (Exception e) {
|
||||
log.warn("[CosyVoice][关闭连接失败]", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 HTTP API 进行 TTS 合成(备用方案)
|
||||
*/
|
||||
private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception {
|
||||
String payload = objectMapper.writeValueAsString(buildPayload(request));
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(properties.getTtsUrl())
|
||||
.addHeader("Authorization", "Bearer " + properties.getApiKey())
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
|
||||
.build();
|
||||
|
||||
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
|
||||
String body = response.body() != null ? response.body().string() : "";
|
||||
if (!response.isSuccessful()) {
|
||||
log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
|
||||
throw buildException(body);
|
||||
}
|
||||
return parseTtsResult(body, request);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用 CosyVoice 语音复刻接口(声音注册)
|
||||
*/
|
||||
public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) {
|
||||
if (!properties.isEnabled()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getUrl())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "复刻音频URL不能为空");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getTargetModel())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "复刻模型不能为空");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getPrefix())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "音色前缀不能为空");
|
||||
}
|
||||
|
||||
try {
|
||||
log.info("[CosyVoice][开始语音复刻][targetModel={}, prefix={}, url={}]",
|
||||
request.getTargetModel(), request.getPrefix(), request.getUrl());
|
||||
|
||||
// 使用 DashScope SDK 创建语音复刻
|
||||
VoiceEnrollmentService service = new VoiceEnrollmentService(properties.getApiKey());
|
||||
Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl());
|
||||
|
||||
log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]",
|
||||
service.getLastRequestId(), voice.getVoiceId());
|
||||
|
||||
// 构建返回结果
|
||||
CosyVoiceCloneResult result = new CosyVoiceCloneResult();
|
||||
result.setVoiceId(voice.getVoiceId());
|
||||
result.setRequestId(service.getLastRequestId());
|
||||
|
||||
return result;
|
||||
} catch (ServiceException ex) {
|
||||
throw ex;
|
||||
} catch (Exception ex) {
|
||||
log.error("[CosyVoice][语音复刻异常][targetModel={}, prefix={}]",
|
||||
request.getTargetModel(), request.getPrefix(), ex);
|
||||
throw exception(VOICE_TTS_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice 语音复刻请求
|
||||
*/
|
||||
@Data
|
||||
public class CosyVoiceCloneRequest {
|
||||
|
||||
/**
|
||||
* 复刻模型(cosyvoice-v1 或 cosyvoice-v2)
|
||||
*/
|
||||
private String targetModel;
|
||||
|
||||
/**
|
||||
* 音色自定义前缀(仅允许数字和小写字母,长度<10字符)
|
||||
*/
|
||||
private String prefix;
|
||||
|
||||
/**
|
||||
* 音频文件公网URL
|
||||
*/
|
||||
private String url;
|
||||
|
||||
/**
|
||||
* 采样率,默认24000
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频格式,默认wav
|
||||
*/
|
||||
private String audioFormat;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice 语音复刻结果
|
||||
*/
|
||||
@Data
|
||||
public class CosyVoiceCloneResult {
|
||||
|
||||
/**
|
||||
* 生成的 voice_id
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 请求ID
|
||||
*/
|
||||
private String requestId;
|
||||
|
||||
}
|
||||
@@ -50,6 +50,11 @@ public class CosyVoiceProperties {
|
||||
*/
|
||||
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
|
||||
|
||||
/**
|
||||
* 语音复刻接口地址(声音注册)
|
||||
*/
|
||||
private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
|
||||
@@ -54,6 +54,10 @@ public class TikUserVoiceDO extends TenantBaseDO {
|
||||
* 备注信息
|
||||
*/
|
||||
private String note;
|
||||
/**
|
||||
* 复刻音色ID(CosyVoice 语音复刻生成的 voice_id)
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
|
||||
import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
@@ -138,20 +140,30 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
.setTranscription(null); // 初始为空,表示未识别
|
||||
voiceMapper.insert(voice);
|
||||
|
||||
// 4. 如果开启自动识别,异步执行识别(添加防重复检查)
|
||||
if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) {
|
||||
// 再次检查是否已经有识别结果(防止并发重复创建)
|
||||
TikUserVoiceDO checkVoice = voiceMapper.selectById(voice.getId());
|
||||
if (StrUtil.isBlank(checkVoice.getTranscription())) {
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
log.info("[createVoice][开启自动识别,配音编号({}),文件ID({}),预签名URL({})]",
|
||||
voice.getId(), fileDO.getId(), fileAccessUrl);
|
||||
asyncTranscribeVoice(voice.getId(), fileAccessUrl);
|
||||
} else {
|
||||
log.info("[createVoice][配音已经有识别结果,跳过自动识别,配音编号({})]", voice.getId());
|
||||
}
|
||||
// 4. 调用阿里云语音复刻服务,生成 voice_id
|
||||
try {
|
||||
log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({})]", voice.getId(), fileDO.getId());
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
|
||||
cloneRequest.setTargetModel("cosyvoice-v2"); // 使用v2模型,效果更好
|
||||
cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求
|
||||
cloneRequest.setUrl(fileAccessUrl);
|
||||
|
||||
CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest);
|
||||
String voiceId = cloneResult.getVoiceId();
|
||||
|
||||
// 更新配音记录,保存 voice_id
|
||||
voice.setVoiceId(voiceId);
|
||||
voiceMapper.updateById(voice);
|
||||
|
||||
log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId);
|
||||
} catch (Exception e) {
|
||||
log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
|
||||
// 复刻失败不影响配音记录创建,只记录日志
|
||||
}
|
||||
|
||||
|
||||
log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId());
|
||||
return voice.getId();
|
||||
}
|
||||
@@ -361,17 +373,25 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
|
||||
}
|
||||
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
// 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据)
|
||||
if (StrUtil.isNotBlank(voice.getVoiceId())) {
|
||||
log.info("[synthesizeVoice][使用复刻音色ID合成,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId());
|
||||
voiceId = voice.getVoiceId();
|
||||
transcriptionText = voice.getTranscription();
|
||||
} else {
|
||||
log.info("[synthesizeVoice][使用文件URL合成,配音编号({})]", voiceConfigId);
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
}
|
||||
}
|
||||
// 2. 如果没有配置ID,使用voiceId或fileUrl(系统音色或直接URL方式)
|
||||
@@ -512,21 +532,31 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
voiceConfigId, voice.getUserId(), userId);
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
|
||||
}
|
||||
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
|
||||
// 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据)
|
||||
if (StrUtil.isNotBlank(voice.getVoiceId())) {
|
||||
log.info("[previewVoice][使用复刻音色ID试听,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId());
|
||||
voiceId = voice.getVoiceId();
|
||||
transcriptionText = voice.getTranscription();
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
|
||||
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
|
||||
} else {
|
||||
log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId);
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
|
||||
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
|
||||
}
|
||||
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
|
||||
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
|
||||
}
|
||||
// 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId)
|
||||
else {
|
||||
|
||||
@@ -38,6 +38,9 @@ public class AppTikUserVoiceRespVO {
|
||||
@Schema(description = "备注", example = "这是一个测试配音")
|
||||
private String note;
|
||||
|
||||
@Schema(description = "复刻音色ID(CosyVoice 语音复刻生成的 voice_id)")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime createTime;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user