Merge remote-tracking branch 'origin/master'

This commit is contained in:
wing
2025-11-22 17:05:43 +08:00
19 changed files with 1705 additions and 1219 deletions

View File

@@ -10,7 +10,6 @@ import cn.iocoder.yudao.module.tik.userprompt.vo.UserPromptRespVO;
import cn.iocoder.yudao.module.tik.userprompt.vo.UserPromptSaveReqVO;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.parameters.RequestBody;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.annotation.Resource;
import jakarta.validation.Valid;
@@ -33,34 +32,31 @@ public class AppUserPromptController {
@PostMapping("/create")
@Operation(summary = "创建用户提示词")
public CommonResult<Long> createUserPrompt(@RequestBody UserPromptSaveReqVO createReqVO) {
// 设置当前登录用户ID(在验证之前设置,避免 @NotNull 验证失败)
public CommonResult<Long> createUserPrompt(@Valid @RequestBody UserPromptSaveReqVO createReqVO) {
// 设置当前登录用户ID
Long userId = getLoginUserId();
if (userId == null) {
return CommonResult.error(401, "用户未登录");
}
createReqVO.setUserId(userId);
// 手动验证必要字段
if (createReqVO.getName() == null || createReqVO.getName().trim().isEmpty()) {
return CommonResult.error(400, "提示词名称不能为空");
// 处理字符串字段的trim
if (createReqVO.getName() != null) {
createReqVO.setName(createReqVO.getName().trim());
}
if (createReqVO.getContent() == null || createReqVO.getContent().trim().isEmpty()) {
return CommonResult.error(400, "提示词内容不能为空");
}
if (createReqVO.getStatus() == null) {
return CommonResult.error(400, "状态不能为空");
if (createReqVO.getContent() != null) {
createReqVO.setContent(createReqVO.getContent().trim());
}
// 设置默认值(如果前端没有传递)
// 设置默认值
if (createReqVO.getIsPublic() == null) {
createReqVO.setIsPublic(false); // 默认私有
createReqVO.setIsPublic(false);
}
if (createReqVO.getSort() == null) {
createReqVO.setSort(0); // 默认排序为 0
createReqVO.setSort(0);
}
if (createReqVO.getUseCount() == null) {
createReqVO.setUseCount(0); // 默认使用次数为 0
createReqVO.setUseCount(0);
}
return success(userPromptService.createUserPrompt(createReqVO));

View File

@@ -3,9 +3,15 @@ package cn.iocoder.yudao.module.tik.voice.client;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import cn.iocoder.yudao.framework.common.exception.ServiceException;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice;
import com.alibaba.dashscope.audio.ttsv2.enrollment.VoiceEnrollmentService;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
@@ -17,6 +23,7 @@ import okhttp3.RequestBody;
import okhttp3.Response;
import org.springframework.stereotype.Component;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Base64;
@@ -53,28 +60,130 @@ public class CosyVoiceClient {
if (request == null || StrUtil.isBlank(request.getText())) {
throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
}
if (StrUtil.isBlank(request.getVoiceId())) {
throw exception0(VOICE_TTS_FAILED.getCode(), "必须提供 voiceId");
}
SpeechSynthesizer synthesizer = null;
try {
String payload = objectMapper.writeValueAsString(buildPayload(request));
Request httpRequest = new Request.Builder()
.url(properties.getTtsUrl())
.addHeader("Authorization", "Bearer " + properties.getApiKey())
.addHeader("Content-Type", "application/json")
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}]",
request.getVoiceId(),
request.getText().length(),
StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()));
// 使用 DashScope SDK 构建参数(严格按文档)
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
.apiKey(properties.getApiKey())
.model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()))
.voice(request.getVoiceId())
.build();
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
String body = response.body() != null ? response.body().string() : "";
if (!response.isSuccessful()) {
log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
throw buildException(body);
}
return parseTtsResult(body, request);
// 初始化合成器(同步调用传 null
synthesizer = new SpeechSynthesizer(param, null);
// 阻塞调用,获取完整音频
ByteBuffer audioData = synthesizer.call(request.getText());
if (audioData == null) {
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频数据");
}
// 转换为字节数组(严格按照文档:直接使用 array()
byte[] audioBytes = audioData.array();
log.info("[CosyVoice][TTS合成成功][Request ID: {}, audioSize={}, 首包延迟={}ms]",
synthesizer.getLastRequestId(),
audioBytes.length,
synthesizer.getFirstPackageDelay());
// 构建返回结果
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
result.setAudio(audioBytes);
result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : properties.getAudioFormat());
result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate());
result.setRequestId(synthesizer.getLastRequestId());
result.setVoiceId(request.getVoiceId());
return result;
} catch (ServiceException ex) {
throw ex;
} catch (Exception ex) {
log.error("[CosyVoice][TTS异常]", ex);
log.error("[CosyVoice][TTS异常][voiceId={}, text={}]", request.getVoiceId(), request.getText(), ex);
throw exception(VOICE_TTS_FAILED);
} finally {
// 关闭 WebSocket 连接
if (synthesizer != null) {
try {
synthesizer.getDuplexApi().close(1000, "任务结束");
} catch (Exception e) {
log.warn("[CosyVoice][关闭连接失败]", e);
}
}
}
}
/**
* 使用 HTTP API 进行 TTS 合成(备用方案)
*/
private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception {
String payload = objectMapper.writeValueAsString(buildPayload(request));
Request httpRequest = new Request.Builder()
.url(properties.getTtsUrl())
.addHeader("Authorization", "Bearer " + properties.getApiKey())
.addHeader("Content-Type", "application/json")
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
.build();
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
String body = response.body() != null ? response.body().string() : "";
if (!response.isSuccessful()) {
log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
throw buildException(body);
}
return parseTtsResult(body, request);
}
}
/**
* 调用 CosyVoice 语音复刻接口(声音注册)
*/
public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) {
if (!properties.isEnabled()) {
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
}
if (request == null || StrUtil.isBlank(request.getUrl())) {
throw exception0(VOICE_TTS_FAILED.getCode(), "复刻音频URL不能为空");
}
if (request == null || StrUtil.isBlank(request.getTargetModel())) {
throw exception0(VOICE_TTS_FAILED.getCode(), "复刻模型不能为空");
}
if (request == null || StrUtil.isBlank(request.getPrefix())) {
throw exception0(VOICE_TTS_FAILED.getCode(), "音色前缀不能为空");
}
try {
log.info("[CosyVoice][开始语音复刻][targetModel={}, prefix={}, url={}]",
request.getTargetModel(), request.getPrefix(), request.getUrl());
// 使用 DashScope SDK 创建语音复刻
VoiceEnrollmentService service = new VoiceEnrollmentService(properties.getApiKey());
Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl());
log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]",
service.getLastRequestId(), voice.getVoiceId());
// 构建返回结果
CosyVoiceCloneResult result = new CosyVoiceCloneResult();
result.setVoiceId(voice.getVoiceId());
result.setRequestId(service.getLastRequestId());
return result;
} catch (ServiceException ex) {
throw ex;
} catch (Exception ex) {
log.error("[CosyVoice][语音复刻异常][targetModel={}, prefix={}]",
request.getTargetModel(), request.getPrefix(), ex);
throw exception(VOICE_TTS_FAILED);
}
}
@@ -86,27 +195,55 @@ public class CosyVoiceClient {
Map<String, Object> input = new HashMap<>();
input.put("text", request.getText());
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
if (StrUtil.isNotBlank(voiceId)) {
input.put("voice", voiceId);
// 优先使用fileUrl语音克隆否则使用voiceId系统音色
if (StrUtil.isNotBlank(request.getFileUrl())) {
// 直接使用预签名URL带签名和时效阿里云API需要这个签名URL
input.put("audio_url", request.getFileUrl());
log.info("[CosyVoice][使用语音克隆][audio_url={}]", request.getFileUrl());
// 如果提供了参考文本,也一并传递(用于提高语音克隆质量)
if (StrUtil.isNotBlank(request.getReferenceText())) {
input.put("reference_text", request.getReferenceText());
log.info("[CosyVoice][添加参考文本][length={}]", request.getReferenceText().length());
}
} else {
// 使用系统音色
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
if (StrUtil.isNotBlank(voiceId)) {
input.put("voice", voiceId);
log.info("[CosyVoice][使用系统音色][voice={}]", voiceId);
} else {
log.warn("[CosyVoice][未提供voiceId或fileUrl]");
}
}
payload.put("input", input);
Map<String, Object> parameters = new HashMap<>();
int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate();
parameters.put("sample_rate", sampleRate);
String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat());
// 根据官方文档,统一使用小写格式
String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat()).toLowerCase();
parameters.put("format", format);
if (request.getSpeechRate() != null) {
parameters.put("speech_rate", request.getSpeechRate());
}
if (request.getVolume() != null) {
parameters.put("volume", request.getVolume());
// 文档显示volume范围是0-100
parameters.put("volume", Math.round(request.getVolume()));
}
if (request.isPreview()) {
parameters.put("preview", true);
}
payload.put("parameters", parameters);
// 打印完整请求体(用于调试)
log.info("[CosyVoice][请求参数][model={}, sample_rate={}, format={}, text_length={}]",
model, sampleRate, format, request.getText().length());
return payload;
}
@@ -173,6 +310,26 @@ public class CosyVoiceClient {
return exception0(VOICE_TTS_FAILED.getCode(), body);
}
}
/**
* 从URL中提取原始URL去除查询参数和锚点
*
* @param url 可能包含查询参数的URL
* @return 原始URL去除查询参数和锚点
*/
private String extractRawUrl(String url) {
if (StrUtil.isBlank(url)) {
return url;
}
try {
java.net.URL urlObj = new java.net.URL(url);
// 只使用协议、主机、路径部分,忽略查询参数和锚点
return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath();
} catch (Exception e) {
// 如果URL解析失败使用简单方式去除查询参数
return url.split("\\?")[0].split("#")[0];
}
}
}

View File

@@ -0,0 +1,36 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import lombok.Data;
/**
* CosyVoice 语音复刻请求
*/
@Data
public class CosyVoiceCloneRequest {
/**
* 复刻模型cosyvoice-v1 或 cosyvoice-v2
*/
private String targetModel;
/**
* 音色自定义前缀(仅允许数字和小写字母,长度<10字符
*/
private String prefix;
/**
* 音频文件公网URL
*/
private String url;
/**
* 采样率默认24000
*/
private Integer sampleRate;
/**
* 音频格式默认wav
*/
private String audioFormat;
}

View File

@@ -0,0 +1,21 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import lombok.Data;
/**
* CosyVoice 语音复刻结果
*/
@Data
public class CosyVoiceCloneResult {
/**
* 生成的 voice_id
*/
private String voiceId;
/**
* 请求ID
*/
private String requestId;
}

View File

@@ -20,6 +20,16 @@ public class CosyVoiceTtsRequest {
*/
private String voiceId;
/**
* 语音文件URL当使用语音URL合成时使用替代voiceId
*/
private String fileUrl;
/**
* 参考音频文本当使用fileUrl时用于提高克隆质量
*/
private String referenceText;
/**
* 模型(默认 cosyvoice-v2
*/

View File

@@ -50,6 +50,11 @@ public class CosyVoiceProperties {
*/
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
/**
* 语音复刻接口地址(声音注册)
*/
private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
/**
* 连接超时时间
*/

View File

@@ -54,6 +54,10 @@ public class TikUserVoiceDO extends TenantBaseDO {
* 备注信息
*/
private String note;
/**
* 复刻音色IDCosyVoice 语音复刻生成的 voice_id
*/
private String voiceId;
}

View File

@@ -20,6 +20,8 @@ import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
@@ -91,8 +93,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
@Resource
private StringRedisTemplate stringRedisTemplate;
/** 预签名URL过期时间1小时,单位:秒) */
private static final int PRESIGN_URL_EXPIRATION_SECONDS = 3600;
/** 预签名URL过期时间24小时,单位:秒) */
private static final int PRESIGN_URL_EXPIRATION_SECONDS = 24 * 3600;
private static final String PREVIEW_CACHE_PREFIX = "tik:voice:preview:";
private static final String SYNTH_CACHE_PREFIX = "tik:voice:tts:";
private static final long PREVIEW_CACHE_TTL_SECONDS = 3600;
@@ -138,14 +140,30 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.setTranscription(null); // 初始为空,表示未识别
voiceMapper.insert(voice);
// 4. 如果开启自动识别,异步执行识别
if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) {
// 4. 调用阿里云语音复刻服务,生成 voice_id
try {
log.info("[createVoice][开始语音复刻,配音编号({})文件ID({})]", voice.getId(), fileDO.getId());
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
log.info("[createVoice][开启自动识别,配音编号({})文件ID({})预签名URL({})]",
voice.getId(), fileDO.getId(), fileAccessUrl);
asyncTranscribeVoice(voice.getId(), fileAccessUrl);
CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
cloneRequest.setTargetModel("cosyvoice-v2"); // 使用v2模型效果更好
cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求
cloneRequest.setUrl(fileAccessUrl);
CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest);
String voiceId = cloneResult.getVoiceId();
// 更新配音记录,保存 voice_id
voice.setVoiceId(voiceId);
voiceMapper.updateById(voice);
log.info("[createVoice][语音复刻成功,配音编号({})voice_id({})]", voice.getId(), voiceId);
} catch (Exception e) {
log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
// 复刻失败不影响配音记录创建,只记录日志
}
log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId());
return voice.getId();
}
@@ -230,6 +248,10 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
// 查询配音列表
PageResult<TikUserVoiceDO> pageResult = voiceMapper.selectPage(pageReqVO);
// 增加日志记录查询到的配音数量和用户ID
log.info("[getVoicePage][查询配音列表用户ID={}, 总数={}]",
userId, pageResult.getTotal());
// 批量查询文件信息,避免 N+1 查询
Map<Long, FileDO> fileMap = new HashMap<>();
if (CollUtil.isNotEmpty(pageResult.getList())) {
@@ -237,7 +259,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.map(TikUserVoiceDO::getFileId)
.distinct()
.collect(Collectors.toList());
if (CollUtil.isNotEmpty(fileIds)) {
List<FileDO> files = fileMapper.selectBatchIds(fileIds);
Map<Long, FileDO> tempFileMap = files.stream()
@@ -258,6 +280,12 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
vo.setFileUrl(presignedUrl);
}
// 增加日志记录转换后的VO数据
if (log.isDebugEnabled()) {
log.debug("[getVoicePage][转换VO配音ID={}, 名称={}]",
vo.getId(), vo.getName());
}
return vo;
});
}
@@ -297,28 +325,101 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
throw exception(VOICE_NOT_EXISTS);
}
// 2. 获取文件URL
// 2. 检查是否已经有识别结果
if (StrUtil.isNotBlank(voice.getTranscription())) {
log.info("[transcribeVoice][配音已经识别过,配音编号({}),跳过识别]", id);
return;
}
// 3. 获取文件URL
FileDO fileDO = fileMapper.selectById(voice.getFileId());
if (fileDO == null) {
throw exception(VOICE_FILE_NOT_EXISTS);
}
// 3. 异步执行识别
// 4. 异步执行识别
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
asyncTranscribeVoice(id, fileAccessUrl);
}
@Override
public AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO) {
Long userId = SecurityFrameworkUtils.getLoginUserId();
Long voiceConfigId = reqVO.getVoiceConfigId();
// 增加请求参数日志
log.info("[synthesizeVoice][开始合成请求参数voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]",
voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
String voiceId = null;
String fileUrl = null;
String transcriptionText = null;
// 1. 如果有配置ID根据配置ID查询配音信息用户配音
if (voiceConfigId != null) {
log.info("[synthesizeVoice][开始合成,配音编号({}),用户({})]", voiceConfigId, userId);
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
log.info("[synthesizeVoice][查询配音结果voice={},配音编号={},用户ID={}]",
voice != null ? "存在" : "不存在", voiceConfigId, userId);
if (voice == null) {
log.warn("[synthesizeVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId);
throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId);
}
if (!voice.getUserId().equals(userId)) {
log.warn("[synthesizeVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]",
voiceConfigId, voice.getUserId(), userId);
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
}
// 优先使用复刻的 voice_id如果不存在则使用文件URL兼容旧数据
if (StrUtil.isNotBlank(voice.getVoiceId())) {
log.info("[synthesizeVoice][使用复刻音色ID合成配音编号({})voice_id({})]", voiceConfigId, voice.getVoiceId());
voiceId = voice.getVoiceId();
transcriptionText = voice.getTranscription();
} else {
log.info("[synthesizeVoice][使用文件URL合成配音编号({})]", voiceConfigId);
// 获取文件信息用于获取文件URL
FileDO fileDO = fileMapper.selectById(voice.getFileId());
if (fileDO == null) {
throw exception(VOICE_FILE_NOT_EXISTS);
}
// 使用文件URL和识别文本进行合成
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
transcriptionText = voice.getTranscription();
if (StrUtil.isBlank(transcriptionText)) {
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
}
}
}
// 2. 如果没有配置ID使用voiceId或fileUrl系统音色或直接URL方式
else {
// 参数验证如果使用fileUrl建议提供transcriptionText以提高克隆质量
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isBlank(reqVO.getTranscriptionText())) {
log.warn("[synthesizeVoice][使用fileUrl但未提供transcriptionText可能影响克隆质量]");
}
// 参数验证必须提供voiceId或fileUrl之一
if (StrUtil.isBlank(reqVO.getVoiceId()) && StrUtil.isBlank(reqVO.getFileUrl())) {
throw exception(VOICE_NOT_EXISTS, "请提供音色ID(voiceId)或语音文件URL(fileUrl)");
}
voiceId = reqVO.getVoiceId();
fileUrl = reqVO.getFileUrl();
transcriptionText = reqVO.getTranscriptionText();
}
String finalText = determineSynthesisText(
reqVO.getTranscriptionText(),
transcriptionText,
reqVO.getInputText(),
false);
finalText = appendEmotion(finalText, reqVO.getEmotion());
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
reqVO.getVoiceId(),
reqVO.getFileUrl(),
voiceId,
fileUrl,
finalText,
reqVO.getSpeechRate(),
reqVO.getVolume(),
@@ -333,7 +434,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
finalText,
reqVO.getVoiceId(),
voiceId,
fileUrl,
transcriptionText,
reqVO.getModel(),
reqVO.getSpeechRate(),
reqVO.getVolume(),
@@ -343,82 +446,196 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
));
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
"file",
buildFileName(voiceId, format),
buildFileName(finalVoiceId, format),
resolveContentType(format),
ttsResult.getAudio()
);
Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
// 通过infraFileId查询TikUserFileDO获取用户文件ID
TikUserFileDO userFile = userFileMapper.selectOne(
new LambdaQueryWrapperX<TikUserFileDO>()
.eq(TikUserFileDO::getFileId, infraFileId)
.eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
.orderByDesc(TikUserFileDO::getId)
.last("LIMIT 1"));
if (userFile == null) {
throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
}
AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
respVO.setFileId(fileId);
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
respVO.setFileId(infraFileId); // 返回infraFileId保持与原有逻辑一致
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
respVO.setFormat(format);
respVO.setSampleRate(ttsResult.getSampleRate());
respVO.setRequestId(ttsResult.getRequestId());
respVO.setVoiceId(voiceId);
respVO.setVoiceId(finalVoiceId);
saveSynthCache(cacheKey, new SynthCacheEntry(
Base64.getEncoder().encodeToString(ttsResult.getAudio()),
format,
ttsResult.getSampleRate(),
ttsResult.getRequestId(),
voiceId
finalVoiceId
));
return respVO;
}
@Override
public AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO) {
String finalText = determineSynthesisText(
reqVO.getTranscriptionText(),
reqVO.getInputText(),
true);
finalText = appendEmotion(finalText, reqVO.getEmotion());
Long userId = SecurityFrameworkUtils.getLoginUserId();
Long voiceConfigId = reqVO.getVoiceConfigId();
// 增加请求参数日志
log.info("[previewVoice][开始试听请求参数voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]",
voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
String voiceId = null;
String fileUrl = null;
String transcriptionText = null;
String inputText;
// 1. 如果传入了fileUrl和transcriptionText直接使用通过语音URL合成
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
log.info("[previewVoice][使用语音URL合成用户({})]", userId);
// 如果传入的是预签名URL提取原始URL去除查询参数避免二次签名
String rawFileUrl = extractRawUrl(reqVO.getFileUrl());
// 如果提取后的URL与原始URL不同说明是预签名URL需要重新生成预签名URL
// 否则直接使用可能是原始URL或公开URL
if (!rawFileUrl.equals(reqVO.getFileUrl())) {
// 重新生成预签名URL确保有效期足够长
fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
log.info("[previewVoice][检测到预签名URL已提取原始URL并重新生成预签名URL]");
} else {
fileUrl = reqVO.getFileUrl();
}
transcriptionText = reqVO.getTranscriptionText();
inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText);
}
// 2. 如果有配置ID根据配置ID查询配音信息用户配音
else if (voiceConfigId != null) {
log.info("[previewVoice][开始试听,配音编号({}),用户({})]", voiceConfigId, userId);
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
log.info("[previewVoice][查询配音结果voice={},配音编号={},用户ID={}]",
voice != null ? "存在" : "不存在", voiceConfigId, userId);
if (voice == null) {
log.warn("[previewVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId);
throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId);
}
if (!voice.getUserId().equals(userId)) {
log.warn("[previewVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]",
voiceConfigId, voice.getUserId(), userId);
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
}
// 优先使用复刻的 voice_id如果不存在则使用文件URL兼容旧数据
if (StrUtil.isNotBlank(voice.getVoiceId())) {
log.info("[previewVoice][使用复刻音色ID试听配音编号({})voice_id({})]", voiceConfigId, voice.getVoiceId());
voiceId = voice.getVoiceId();
transcriptionText = voice.getTranscription();
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
} else {
log.info("[previewVoice][使用文件URL试听配音编号({})]", voiceConfigId);
// 获取文件信息用于获取文件URL
FileDO fileDO = fileMapper.selectById(voice.getFileId());
if (fileDO == null) {
throw exception(VOICE_FILE_NOT_EXISTS);
}
// 使用文件URL和识别文本进行合成
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
transcriptionText = voice.getTranscription();
if (StrUtil.isBlank(transcriptionText)) {
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
}
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
}
}
// 3. 如果没有配置ID使用系统配音配置需要前端传voiceId
else {
log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId);
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
if (StrUtil.isBlank(voiceId)) {
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
}
inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
}
String finalText = determineSynthesisText(
transcriptionText,
inputText,
true);
// 使用请求参数或默认值
String emotion = StrUtil.blankToDefault(reqVO.getEmotion(), "neutral");
finalText = appendEmotion(finalText, emotion);
Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
// 构建缓存key使用fileUrl或voiceId
String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX,
reqVO.getVoiceId(),
reqVO.getFileUrl(),
voiceId,
fileUrl,
finalText,
reqVO.getSpeechRate(),
reqVO.getVolume(),
reqVO.getEmotion(),
reqVO.getAudioFormat(),
speechRate,
volume,
emotion,
audioFormat,
null);
PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
if (previewCache != null) {
log.info("[previewVoice][使用缓存,配音编号({})voiceId({})fileUrl({})cacheKey({})]",
voiceConfigId, voiceId, fileUrl, cacheKey);
// 缓存中存储的是原始URL需要生成预签名URL
String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
return buildPreviewResp(previewCache, cachedUrl, voiceId);
}
log.info("[previewVoice][调用CosyVoice合成配音编号({})voiceId({})fileUrl({}),文本长度({})]",
voiceConfigId, voiceId, fileUrl, finalText.length());
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
finalText,
reqVO.getVoiceId(),
reqVO.getModel(),
reqVO.getSpeechRate(),
reqVO.getVolume(),
voiceId,
fileUrl,
transcriptionText, // 参考音频文本,用于提高克隆质量
null, // 使用默认模型
speechRate,
volume,
null,
reqVO.getAudioFormat(),
audioFormat,
true
));
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
String objectName = buildFileName(voiceId, format);
String fileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
String presignUrl = fileApi.presignGetUrl(fileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
PreviewCacheEntry entry = new PreviewCacheEntry(fileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
String format = defaultFormat(ttsResult.getFormat(), audioFormat);
String identifier = StrUtil.isNotBlank(fileUrl) ? "fileUrl" : (StrUtil.isNotBlank(voiceId) ? voiceId : "voice");
String objectName = buildFileName(identifier, format);
// 上传到OSS返回原始URL不是预签名URL
String resultFileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
log.info("[previewVoice][合成成功,配音编号({})voiceId({})fileUrl({})resultFileUrl({})format({})]",
voiceConfigId, voiceId, fileUrl, resultFileUrl, format);
// 生成预签名URL用于返回给前端
String presignUrl = fileApi.presignGetUrl(resultFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
// 缓存中存储原始URL不是预签名URL下次使用时再生成预签名URL
PreviewCacheEntry entry = new PreviewCacheEntry(resultFileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
savePreviewCache(cacheKey, entry);
return buildPreviewResp(entry, presignUrl, voiceId);
}
private CosyVoiceTtsRequest buildTtsRequest(String text,
String voiceId,
String fileUrl,
String referenceText,
String model,
Float speechRate,
Float volume,
@@ -428,6 +645,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
return CosyVoiceTtsRequest.builder()
.text(text)
.voiceId(voiceId)
.fileUrl(fileUrl)
.referenceText(referenceText)
.model(model)
.speechRate(speechRate)
.volume(volume)
@@ -500,6 +719,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
return "【情感:" + emotionLabel + "" + text;
}
/**
* 从URL中提取原始URL去除查询参数和锚点
*
* @param url 可能包含查询参数的URL
* @return 原始URL去除查询参数和锚点
*/
private String extractRawUrl(String url) {
if (StrUtil.isBlank(url)) {
return url;
}
try {
java.net.URL urlObj = new java.net.URL(url);
// 只使用协议、主机、路径部分,忽略查询参数和锚点
return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath();
} catch (Exception e) {
// 如果URL解析失败使用简单方式去除查询参数
return url.split("\\?")[0].split("#")[0];
}
}
private String buildCacheKey(String prefix,
String voiceId,
String fileUrl,
@@ -509,9 +748,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
String emotion,
String audioFormat,
Integer sampleRate) {
String identifier = StrUtil.isNotBlank(voiceId)
? voiceId
: StrUtil.blankToDefault(fileUrl, "no-voice");
// 构建标识符优先使用voiceId如果没有则使用fileUrl的稳定部分去除查询参数
String identifier;
if (StrUtil.isNotBlank(voiceId)) {
identifier = voiceId;
} else if (StrUtil.isNotBlank(fileUrl)) {
// 对于fileUrl提取稳定部分去除预签名URL的查询参数避免缓存key不稳定
identifier = extractRawUrl(fileUrl);
} else {
identifier = "no-voice";
}
String payload = StrUtil.join("|",
identifier,
text,
@@ -584,11 +831,22 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
resolveContentType(format),
audioBytes
);
Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
// 通过infraFileId查询TikUserFileDO获取用户文件ID
TikUserFileDO userFile = userFileMapper.selectOne(
new LambdaQueryWrapperX<TikUserFileDO>()
.eq(TikUserFileDO::getFileId, infraFileId)
.eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
.orderByDesc(TikUserFileDO::getId)
.last("LIMIT 1"));
if (userFile == null) {
throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
}
AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
respVO.setFileId(fileId);
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
respVO.setFileId(infraFileId); // 返回infraFileId保持与原有逻辑一致
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
respVO.setFormat(format);
respVO.setSampleRate(cache.getSampleRate());
respVO.setRequestId(cache.getRequestId());
@@ -685,21 +943,40 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
@Async
public void asyncTranscribeVoice(Long voiceId, String fileUrl) {
try {
// 1. 检查是否已经识别过(防重复)
TikUserVoiceDO existingVoice = voiceMapper.selectById(voiceId);
if (existingVoice == null) {
log.warn("[asyncTranscribeVoice][配音记录不存在,配音编号({})]", voiceId);
return;
}
// 如果已经有识别结果,不再重复识别
if (StrUtil.isNotBlank(existingVoice.getTranscription())) {
log.info("[asyncTranscribeVoice][配音已经识别过,配音编号({}),跳过识别]", voiceId);
return;
}
log.info("[asyncTranscribeVoice][开始识别,配音编号({})文件URL({})]", voiceId, fileUrl);
Object result = tikHupService.videoToCharacters2(Collections.singletonList(fileUrl));
// 解析识别结果
String transcription = extractTranscription(result);
if (StrUtil.isNotBlank(transcription)) {
// 更新识别结果
TikUserVoiceDO updateObj = new TikUserVoiceDO()
.setId(voiceId)
.setTranscription(transcription);
voiceMapper.updateById(updateObj);
log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length());
// 二次检查:解析后再次检查是否已经有识别结果(避免并发重复)
TikUserVoiceDO currentVoice = voiceMapper.selectById(voiceId);
if (currentVoice != null && StrUtil.isBlank(currentVoice.getTranscription())) {
// 更新识别结果
TikUserVoiceDO updateObj = new TikUserVoiceDO()
.setId(voiceId)
.setTranscription(transcription);
voiceMapper.updateById(updateObj);
log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length());
} else {
log.info("[asyncTranscribeVoice][并发跳过更新,配音编号({})已经有识别结果]", voiceId);
}
} else {
log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]",
log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]",
voiceId, result instanceof CommonResult ? ((CommonResult<?>) result).getCode() : "未知");
}
} catch (Exception e) {
@@ -773,17 +1050,28 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
if (CollUtil.isEmpty(results)) {
return null;
}
Object lastObj = results.get(results.size() - 1);
if (!(lastObj instanceof JSONObject lastResult)) {
// 阿里云语音识别:取第一个结果即可
Object firstObj = results.get(0);
if (!(firstObj instanceof JSONObject firstResult)) {
return null;
}
String transcriptionUrl = lastResult.getStr("transcription_url");
if (StrUtil.isBlank(transcriptionUrl)) {
return null;
// 先从第一个结果中直接提取文本
String directText = extractTextFromJson(firstResult);
if (StrUtil.isNotBlank(directText)) {
return directText;
}
StringBuilder builder = new StringBuilder();
appendRemoteTranscription(builder, transcriptionUrl);
return builder.length() > 0 ? builder.toString().trim() : null;
// 如果没有直接文本,尝试获取 transcription_url
String transcriptionUrl = firstResult.getStr("transcription_url");
if (StrUtil.isNotBlank(transcriptionUrl)) {
StringBuilder builder = new StringBuilder();
appendRemoteTranscription(builder, transcriptionUrl);
return builder.length() > 0 ? builder.toString().trim() : null;
}
return null;
}
} catch (Exception e) {
log.warn("[parseTranscriptionText][解析Paraformer结果失败]", e);

View File

@@ -38,6 +38,9 @@ public class AppTikUserVoiceRespVO {
@Schema(description = "备注", example = "这是一个测试配音")
private String note;
@Schema(description = "复刻音色IDCosyVoice 语音复刻生成的 voice_id")
private String voiceId;
@Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
private LocalDateTime createTime;

View File

@@ -1,6 +1,7 @@
package cn.iocoder.yudao.module.tik.voice.vo;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Size;
import lombok.Data;
@@ -10,33 +11,33 @@ import lombok.Data;
@Data
public class AppTikVoicePreviewReqVO {
@Schema(description = "输入文本")
@Schema(description = "配音编号tik_user_voice.id用户配音必传系统配音可不传")
private Long voiceConfigId;
@Schema(description = "CosyVoice音色ID系统配音必传用户配音可不传")
private String voiceId;
@Schema(description = "语音文件URL当使用语音URL合成时必传替代voiceId")
private String fileUrl;
@Schema(description = "语音文本/识别文本当使用fileUrl时必传")
@Size(max = 4000, message = "语音文本不能超过 4000 个字符")
private String transcriptionText;
@Schema(description = "输入文本(可选,如果不传则使用配音的识别文本或默认文本)")
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
private String inputText;
@Schema(description = "识别文本,用于拼接")
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
private String transcriptionText;
@Schema(description = "音色 IDCosyVoice voiceId")
private String voiceId;
@Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)")
private String fileUrl;
@Schema(description = "模型名称,默认 cosyvoice-v2")
private String model;
@Schema(description = "语速", example = "1.0")
@Schema(description = "语速可选默认1.0", example = "1.0")
private Float speechRate;
@Schema(description = "音量", example = "0")
@Schema(description = "音量可选默认0", example = "0")
private Float volume;
@Schema(description = "情感", example = "neutral")
@Schema(description = "情感可选默认neutral", example = "neutral")
private String emotion;
@Schema(description = "音频格式,默认 wav")
@Schema(description = "音频格式可选默认mp3", example = "mp3")
private String audioFormat;
}

View File

@@ -14,6 +14,9 @@ public class AppTikVoiceTtsReqVO {
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
private String inputText;
@Schema(description = "配音编号tik_user_voice.id用户配音必传系统配音可不传")
private Long voiceConfigId;
@Schema(description = "识别文本,用于拼接")
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
private String transcriptionText;