优化
This commit is contained in:
@@ -10,7 +10,6 @@ import cn.iocoder.yudao.module.tik.userprompt.vo.UserPromptRespVO;
|
||||
import cn.iocoder.yudao.module.tik.userprompt.vo.UserPromptSaveReqVO;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.parameters.RequestBody;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import jakarta.annotation.Resource;
|
||||
import jakarta.validation.Valid;
|
||||
@@ -33,34 +32,31 @@ public class AppUserPromptController {
|
||||
|
||||
@PostMapping("/create")
|
||||
@Operation(summary = "创建用户提示词")
|
||||
public CommonResult<Long> createUserPrompt(@RequestBody UserPromptSaveReqVO createReqVO) {
|
||||
// 先设置当前登录用户ID(在验证之前设置,避免 @NotNull 验证失败)
|
||||
public CommonResult<Long> createUserPrompt(@Valid @RequestBody UserPromptSaveReqVO createReqVO) {
|
||||
// 设置当前登录用户ID
|
||||
Long userId = getLoginUserId();
|
||||
if (userId == null) {
|
||||
return CommonResult.error(401, "用户未登录");
|
||||
}
|
||||
createReqVO.setUserId(userId);
|
||||
|
||||
// 手动验证必要字段
|
||||
if (createReqVO.getName() == null || createReqVO.getName().trim().isEmpty()) {
|
||||
return CommonResult.error(400, "提示词名称不能为空");
|
||||
// 处理字符串字段的trim
|
||||
if (createReqVO.getName() != null) {
|
||||
createReqVO.setName(createReqVO.getName().trim());
|
||||
}
|
||||
if (createReqVO.getContent() == null || createReqVO.getContent().trim().isEmpty()) {
|
||||
return CommonResult.error(400, "提示词内容不能为空");
|
||||
}
|
||||
if (createReqVO.getStatus() == null) {
|
||||
return CommonResult.error(400, "状态不能为空");
|
||||
if (createReqVO.getContent() != null) {
|
||||
createReqVO.setContent(createReqVO.getContent().trim());
|
||||
}
|
||||
|
||||
// 设置默认值(如果前端没有传递)
|
||||
// 设置默认值
|
||||
if (createReqVO.getIsPublic() == null) {
|
||||
createReqVO.setIsPublic(false); // 默认私有
|
||||
createReqVO.setIsPublic(false);
|
||||
}
|
||||
if (createReqVO.getSort() == null) {
|
||||
createReqVO.setSort(0); // 默认排序为 0
|
||||
createReqVO.setSort(0);
|
||||
}
|
||||
if (createReqVO.getUseCount() == null) {
|
||||
createReqVO.setUseCount(0); // 默认使用次数为 0
|
||||
createReqVO.setUseCount(0);
|
||||
}
|
||||
|
||||
return success(userPromptService.createUserPrompt(createReqVO));
|
||||
|
||||
@@ -86,27 +86,55 @@ public class CosyVoiceClient {
|
||||
|
||||
Map<String, Object> input = new HashMap<>();
|
||||
input.put("text", request.getText());
|
||||
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
input.put("voice", voiceId);
|
||||
|
||||
// 优先使用fileUrl(语音克隆),否则使用voiceId(系统音色)
|
||||
if (StrUtil.isNotBlank(request.getFileUrl())) {
|
||||
// 直接使用预签名URL(带签名和时效),阿里云API需要这个签名URL
|
||||
input.put("audio_url", request.getFileUrl());
|
||||
log.info("[CosyVoice][使用语音克隆][audio_url={}]", request.getFileUrl());
|
||||
|
||||
// 如果提供了参考文本,也一并传递(用于提高语音克隆质量)
|
||||
if (StrUtil.isNotBlank(request.getReferenceText())) {
|
||||
input.put("reference_text", request.getReferenceText());
|
||||
log.info("[CosyVoice][添加参考文本][length={}]", request.getReferenceText().length());
|
||||
}
|
||||
} else {
|
||||
// 使用系统音色
|
||||
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
input.put("voice", voiceId);
|
||||
log.info("[CosyVoice][使用系统音色][voice={}]", voiceId);
|
||||
} else {
|
||||
log.warn("[CosyVoice][未提供voiceId或fileUrl]");
|
||||
}
|
||||
}
|
||||
payload.put("input", input);
|
||||
|
||||
Map<String, Object> parameters = new HashMap<>();
|
||||
int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate();
|
||||
parameters.put("sample_rate", sampleRate);
|
||||
String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat());
|
||||
|
||||
// 根据官方文档,统一使用小写格式
|
||||
String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat()).toLowerCase();
|
||||
parameters.put("format", format);
|
||||
|
||||
if (request.getSpeechRate() != null) {
|
||||
parameters.put("speech_rate", request.getSpeechRate());
|
||||
}
|
||||
if (request.getVolume() != null) {
|
||||
parameters.put("volume", request.getVolume());
|
||||
// 文档显示volume范围是0-100
|
||||
parameters.put("volume", Math.round(request.getVolume()));
|
||||
}
|
||||
if (request.isPreview()) {
|
||||
parameters.put("preview", true);
|
||||
}
|
||||
|
||||
payload.put("parameters", parameters);
|
||||
|
||||
// 打印完整请求体(用于调试)
|
||||
log.info("[CosyVoice][请求参数][model={}, sample_rate={}, format={}, text_length={}]",
|
||||
model, sampleRate, format, request.getText().length());
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
@@ -173,6 +201,26 @@ public class CosyVoiceClient {
|
||||
return exception0(VOICE_TTS_FAILED.getCode(), body);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从URL中提取原始URL(去除查询参数和锚点)
|
||||
*
|
||||
* @param url 可能包含查询参数的URL
|
||||
* @return 原始URL(去除查询参数和锚点)
|
||||
*/
|
||||
private String extractRawUrl(String url) {
|
||||
if (StrUtil.isBlank(url)) {
|
||||
return url;
|
||||
}
|
||||
try {
|
||||
java.net.URL urlObj = new java.net.URL(url);
|
||||
// 只使用协议、主机、路径部分,忽略查询参数和锚点
|
||||
return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath();
|
||||
} catch (Exception e) {
|
||||
// 如果URL解析失败,使用简单方式去除查询参数
|
||||
return url.split("\\?")[0].split("#")[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -20,6 +20,16 @@ public class CosyVoiceTtsRequest {
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 语音文件URL(当使用语音URL合成时使用,替代voiceId)
|
||||
*/
|
||||
private String fileUrl;
|
||||
|
||||
/**
|
||||
* 参考音频文本(当使用fileUrl时,用于提高克隆质量)
|
||||
*/
|
||||
private String referenceText;
|
||||
|
||||
/**
|
||||
* 模型(默认 cosyvoice-v2)
|
||||
*/
|
||||
|
||||
@@ -91,8 +91,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
@Resource
|
||||
private StringRedisTemplate stringRedisTemplate;
|
||||
|
||||
/** 预签名URL过期时间(1小时,单位:秒) */
|
||||
private static final int PRESIGN_URL_EXPIRATION_SECONDS = 3600;
|
||||
/** 预签名URL过期时间(24小时,单位:秒) */
|
||||
private static final int PRESIGN_URL_EXPIRATION_SECONDS = 24 * 3600;
|
||||
private static final String PREVIEW_CACHE_PREFIX = "tik:voice:preview:";
|
||||
private static final String SYNTH_CACHE_PREFIX = "tik:voice:tts:";
|
||||
private static final long PREVIEW_CACHE_TTL_SECONDS = 3600;
|
||||
@@ -138,12 +138,18 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
.setTranscription(null); // 初始为空,表示未识别
|
||||
voiceMapper.insert(voice);
|
||||
|
||||
// 4. 如果开启自动识别,异步执行识别
|
||||
// 4. 如果开启自动识别,异步执行识别(添加防重复检查)
|
||||
if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) {
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
log.info("[createVoice][开启自动识别,配音编号({}),文件ID({}),预签名URL({})]",
|
||||
voice.getId(), fileDO.getId(), fileAccessUrl);
|
||||
asyncTranscribeVoice(voice.getId(), fileAccessUrl);
|
||||
// 再次检查是否已经有识别结果(防止并发重复创建)
|
||||
TikUserVoiceDO checkVoice = voiceMapper.selectById(voice.getId());
|
||||
if (StrUtil.isBlank(checkVoice.getTranscription())) {
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
log.info("[createVoice][开启自动识别,配音编号({}),文件ID({}),预签名URL({})]",
|
||||
voice.getId(), fileDO.getId(), fileAccessUrl);
|
||||
asyncTranscribeVoice(voice.getId(), fileAccessUrl);
|
||||
} else {
|
||||
log.info("[createVoice][配音已经有识别结果,跳过自动识别,配音编号({})]", voice.getId());
|
||||
}
|
||||
}
|
||||
|
||||
log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId());
|
||||
@@ -230,6 +236,10 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
// 查询配音列表
|
||||
PageResult<TikUserVoiceDO> pageResult = voiceMapper.selectPage(pageReqVO);
|
||||
|
||||
// 增加日志:记录查询到的配音数量和用户ID
|
||||
log.info("[getVoicePage][查询配音列表,用户ID={}, 总数={}]",
|
||||
userId, pageResult.getTotal());
|
||||
|
||||
// 批量查询文件信息,避免 N+1 查询
|
||||
Map<Long, FileDO> fileMap = new HashMap<>();
|
||||
if (CollUtil.isNotEmpty(pageResult.getList())) {
|
||||
@@ -237,7 +247,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
.map(TikUserVoiceDO::getFileId)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
if (CollUtil.isNotEmpty(fileIds)) {
|
||||
List<FileDO> files = fileMapper.selectBatchIds(fileIds);
|
||||
Map<Long, FileDO> tempFileMap = files.stream()
|
||||
@@ -258,6 +268,12 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
vo.setFileUrl(presignedUrl);
|
||||
}
|
||||
|
||||
// 增加日志:记录转换后的VO数据
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("[getVoicePage][转换VO,配音ID={}, 名称={}]",
|
||||
vo.getId(), vo.getName());
|
||||
}
|
||||
|
||||
return vo;
|
||||
});
|
||||
}
|
||||
@@ -297,28 +313,93 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
throw exception(VOICE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 2. 获取文件URL
|
||||
// 2. 检查是否已经有识别结果
|
||||
if (StrUtil.isNotBlank(voice.getTranscription())) {
|
||||
log.info("[transcribeVoice][配音已经识别过,配音编号({}),跳过识别]", id);
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. 获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 3. 异步执行识别
|
||||
// 4. 异步执行识别
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
asyncTranscribeVoice(id, fileAccessUrl);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO) {
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
Long voiceConfigId = reqVO.getVoiceConfigId();
|
||||
|
||||
// 增加请求参数日志
|
||||
log.info("[synthesizeVoice][开始合成,请求参数:voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]",
|
||||
voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
|
||||
|
||||
String voiceId = null;
|
||||
String fileUrl = null;
|
||||
String transcriptionText = null;
|
||||
|
||||
// 1. 如果有配置ID,根据配置ID查询配音信息(用户配音)
|
||||
if (voiceConfigId != null) {
|
||||
log.info("[synthesizeVoice][开始合成,配音编号({}),用户({})]", voiceConfigId, userId);
|
||||
|
||||
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
|
||||
log.info("[synthesizeVoice][查询配音结果:voice={},配音编号={},用户ID={}]",
|
||||
voice != null ? "存在" : "不存在", voiceConfigId, userId);
|
||||
|
||||
if (voice == null) {
|
||||
log.warn("[synthesizeVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId);
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId);
|
||||
}
|
||||
if (!voice.getUserId().equals(userId)) {
|
||||
log.warn("[synthesizeVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]",
|
||||
voiceConfigId, voice.getUserId(), userId);
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
|
||||
}
|
||||
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
}
|
||||
// 2. 如果没有配置ID,使用voiceId或fileUrl(系统音色或直接URL方式)
|
||||
else {
|
||||
// 参数验证:如果使用fileUrl,建议提供transcriptionText以提高克隆质量
|
||||
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isBlank(reqVO.getTranscriptionText())) {
|
||||
log.warn("[synthesizeVoice][使用fileUrl但未提供transcriptionText,可能影响克隆质量]");
|
||||
}
|
||||
|
||||
// 参数验证:必须提供voiceId或fileUrl之一
|
||||
if (StrUtil.isBlank(reqVO.getVoiceId()) && StrUtil.isBlank(reqVO.getFileUrl())) {
|
||||
throw exception(VOICE_NOT_EXISTS, "请提供音色ID(voiceId)或语音文件URL(fileUrl)");
|
||||
}
|
||||
|
||||
voiceId = reqVO.getVoiceId();
|
||||
fileUrl = reqVO.getFileUrl();
|
||||
transcriptionText = reqVO.getTranscriptionText();
|
||||
}
|
||||
|
||||
String finalText = determineSynthesisText(
|
||||
reqVO.getTranscriptionText(),
|
||||
transcriptionText,
|
||||
reqVO.getInputText(),
|
||||
false);
|
||||
finalText = appendEmotion(finalText, reqVO.getEmotion());
|
||||
|
||||
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getFileUrl(),
|
||||
voiceId,
|
||||
fileUrl,
|
||||
finalText,
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
@@ -333,7 +414,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
|
||||
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
|
||||
finalText,
|
||||
reqVO.getVoiceId(),
|
||||
voiceId,
|
||||
fileUrl,
|
||||
transcriptionText,
|
||||
reqVO.getModel(),
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
@@ -343,82 +426,186 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
));
|
||||
|
||||
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
|
||||
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId());
|
||||
ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
|
||||
"file",
|
||||
buildFileName(voiceId, format),
|
||||
buildFileName(finalVoiceId, format),
|
||||
resolveContentType(format),
|
||||
ttsResult.getAudio()
|
||||
);
|
||||
Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
|
||||
Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
|
||||
|
||||
// 通过infraFileId查询TikUserFileDO,获取用户文件ID
|
||||
TikUserFileDO userFile = userFileMapper.selectOne(
|
||||
new LambdaQueryWrapperX<TikUserFileDO>()
|
||||
.eq(TikUserFileDO::getFileId, infraFileId)
|
||||
.eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
|
||||
.orderByDesc(TikUserFileDO::getId)
|
||||
.last("LIMIT 1"));
|
||||
if (userFile == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
|
||||
}
|
||||
|
||||
AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
|
||||
respVO.setFileId(fileId);
|
||||
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
|
||||
respVO.setFileId(infraFileId); // 返回infraFileId,保持与原有逻辑一致
|
||||
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
|
||||
respVO.setFormat(format);
|
||||
respVO.setSampleRate(ttsResult.getSampleRate());
|
||||
respVO.setRequestId(ttsResult.getRequestId());
|
||||
respVO.setVoiceId(voiceId);
|
||||
respVO.setVoiceId(finalVoiceId);
|
||||
|
||||
saveSynthCache(cacheKey, new SynthCacheEntry(
|
||||
Base64.getEncoder().encodeToString(ttsResult.getAudio()),
|
||||
format,
|
||||
ttsResult.getSampleRate(),
|
||||
ttsResult.getRequestId(),
|
||||
voiceId
|
||||
finalVoiceId
|
||||
));
|
||||
return respVO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO) {
|
||||
String finalText = determineSynthesisText(
|
||||
reqVO.getTranscriptionText(),
|
||||
reqVO.getInputText(),
|
||||
true);
|
||||
finalText = appendEmotion(finalText, reqVO.getEmotion());
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
Long voiceConfigId = reqVO.getVoiceConfigId();
|
||||
|
||||
// 增加请求参数日志
|
||||
log.info("[previewVoice][开始试听,请求参数:voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]",
|
||||
voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
|
||||
|
||||
String voiceId = null;
|
||||
String fileUrl = null;
|
||||
String transcriptionText = null;
|
||||
String inputText;
|
||||
|
||||
// 1. 如果传入了fileUrl和transcriptionText,直接使用(通过语音URL合成)
|
||||
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
|
||||
log.info("[previewVoice][使用语音URL合成,用户({})]", userId);
|
||||
// 如果传入的是预签名URL,提取原始URL(去除查询参数),避免二次签名
|
||||
String rawFileUrl = extractRawUrl(reqVO.getFileUrl());
|
||||
// 如果提取后的URL与原始URL不同,说明是预签名URL,需要重新生成预签名URL
|
||||
// 否则直接使用(可能是原始URL或公开URL)
|
||||
if (!rawFileUrl.equals(reqVO.getFileUrl())) {
|
||||
// 重新生成预签名URL,确保有效期足够长
|
||||
fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
log.info("[previewVoice][检测到预签名URL,已提取原始URL并重新生成预签名URL]");
|
||||
} else {
|
||||
fileUrl = reqVO.getFileUrl();
|
||||
}
|
||||
transcriptionText = reqVO.getTranscriptionText();
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText);
|
||||
}
|
||||
// 2. 如果有配置ID,根据配置ID查询配音信息(用户配音)
|
||||
else if (voiceConfigId != null) {
|
||||
log.info("[previewVoice][开始试听,配音编号({}),用户({})]", voiceConfigId, userId);
|
||||
|
||||
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
|
||||
log.info("[previewVoice][查询配音结果:voice={},配音编号={},用户ID={}]",
|
||||
voice != null ? "存在" : "不存在", voiceConfigId, userId);
|
||||
|
||||
if (voice == null) {
|
||||
log.warn("[previewVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId);
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId);
|
||||
}
|
||||
if (!voice.getUserId().equals(userId)) {
|
||||
log.warn("[previewVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]",
|
||||
voiceConfigId, voice.getUserId(), userId);
|
||||
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
|
||||
}
|
||||
|
||||
// 获取文件信息,用于获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 使用文件URL和识别文本进行合成
|
||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
transcriptionText = voice.getTranscription();
|
||||
if (StrUtil.isBlank(transcriptionText)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||
}
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
|
||||
StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
|
||||
}
|
||||
// 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId)
|
||||
else {
|
||||
log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId);
|
||||
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
if (StrUtil.isBlank(voiceId)) {
|
||||
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
|
||||
}
|
||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText());
|
||||
}
|
||||
|
||||
String finalText = determineSynthesisText(
|
||||
transcriptionText,
|
||||
inputText,
|
||||
true);
|
||||
|
||||
// 使用请求参数或默认值
|
||||
String emotion = StrUtil.blankToDefault(reqVO.getEmotion(), "neutral");
|
||||
finalText = appendEmotion(finalText, emotion);
|
||||
Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
|
||||
Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
|
||||
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
|
||||
|
||||
// 构建缓存key(使用fileUrl或voiceId)
|
||||
String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getFileUrl(),
|
||||
voiceId,
|
||||
fileUrl,
|
||||
finalText,
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
reqVO.getEmotion(),
|
||||
reqVO.getAudioFormat(),
|
||||
speechRate,
|
||||
volume,
|
||||
emotion,
|
||||
audioFormat,
|
||||
null);
|
||||
PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
|
||||
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
|
||||
if (previewCache != null) {
|
||||
log.info("[previewVoice][使用缓存,配音编号({}),voiceId({}),fileUrl({}),cacheKey({})]",
|
||||
voiceConfigId, voiceId, fileUrl, cacheKey);
|
||||
// 缓存中存储的是原始URL,需要生成预签名URL
|
||||
String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
return buildPreviewResp(previewCache, cachedUrl, voiceId);
|
||||
}
|
||||
|
||||
log.info("[previewVoice][调用CosyVoice合成,配音编号({}),voiceId({}),fileUrl({}),文本长度({})]",
|
||||
voiceConfigId, voiceId, fileUrl, finalText.length());
|
||||
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
|
||||
finalText,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getModel(),
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
voiceId,
|
||||
fileUrl,
|
||||
transcriptionText, // 参考音频文本,用于提高克隆质量
|
||||
null, // 使用默认模型
|
||||
speechRate,
|
||||
volume,
|
||||
null,
|
||||
reqVO.getAudioFormat(),
|
||||
audioFormat,
|
||||
true
|
||||
));
|
||||
|
||||
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
|
||||
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
String objectName = buildFileName(voiceId, format);
|
||||
String fileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
|
||||
String presignUrl = fileApi.presignGetUrl(fileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
PreviewCacheEntry entry = new PreviewCacheEntry(fileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
|
||||
String format = defaultFormat(ttsResult.getFormat(), audioFormat);
|
||||
String identifier = StrUtil.isNotBlank(fileUrl) ? "fileUrl" : (StrUtil.isNotBlank(voiceId) ? voiceId : "voice");
|
||||
String objectName = buildFileName(identifier, format);
|
||||
// 上传到OSS,返回原始URL(不是预签名URL)
|
||||
String resultFileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
|
||||
log.info("[previewVoice][合成成功,配音编号({}),voiceId({}),fileUrl({}),resultFileUrl({}),format({})]",
|
||||
voiceConfigId, voiceId, fileUrl, resultFileUrl, format);
|
||||
|
||||
// 生成预签名URL用于返回给前端
|
||||
String presignUrl = fileApi.presignGetUrl(resultFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
// 缓存中存储原始URL(不是预签名URL),下次使用时再生成预签名URL
|
||||
PreviewCacheEntry entry = new PreviewCacheEntry(resultFileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
|
||||
savePreviewCache(cacheKey, entry);
|
||||
return buildPreviewResp(entry, presignUrl, voiceId);
|
||||
}
|
||||
|
||||
private CosyVoiceTtsRequest buildTtsRequest(String text,
|
||||
String voiceId,
|
||||
String fileUrl,
|
||||
String referenceText,
|
||||
String model,
|
||||
Float speechRate,
|
||||
Float volume,
|
||||
@@ -428,6 +615,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return CosyVoiceTtsRequest.builder()
|
||||
.text(text)
|
||||
.voiceId(voiceId)
|
||||
.fileUrl(fileUrl)
|
||||
.referenceText(referenceText)
|
||||
.model(model)
|
||||
.speechRate(speechRate)
|
||||
.volume(volume)
|
||||
@@ -500,6 +689,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
return "【情感:" + emotionLabel + "】" + text;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从URL中提取原始URL(去除查询参数和锚点)
|
||||
*
|
||||
* @param url 可能包含查询参数的URL
|
||||
* @return 原始URL(去除查询参数和锚点)
|
||||
*/
|
||||
private String extractRawUrl(String url) {
|
||||
if (StrUtil.isBlank(url)) {
|
||||
return url;
|
||||
}
|
||||
try {
|
||||
java.net.URL urlObj = new java.net.URL(url);
|
||||
// 只使用协议、主机、路径部分,忽略查询参数和锚点
|
||||
return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath();
|
||||
} catch (Exception e) {
|
||||
// 如果URL解析失败,使用简单方式去除查询参数
|
||||
return url.split("\\?")[0].split("#")[0];
|
||||
}
|
||||
}
|
||||
|
||||
private String buildCacheKey(String prefix,
|
||||
String voiceId,
|
||||
String fileUrl,
|
||||
@@ -509,9 +718,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
String emotion,
|
||||
String audioFormat,
|
||||
Integer sampleRate) {
|
||||
String identifier = StrUtil.isNotBlank(voiceId)
|
||||
? voiceId
|
||||
: StrUtil.blankToDefault(fileUrl, "no-voice");
|
||||
// 构建标识符:优先使用voiceId,如果没有则使用fileUrl的稳定部分(去除查询参数)
|
||||
String identifier;
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
identifier = voiceId;
|
||||
} else if (StrUtil.isNotBlank(fileUrl)) {
|
||||
// 对于fileUrl,提取稳定部分(去除预签名URL的查询参数,避免缓存key不稳定)
|
||||
identifier = extractRawUrl(fileUrl);
|
||||
} else {
|
||||
identifier = "no-voice";
|
||||
}
|
||||
|
||||
String payload = StrUtil.join("|",
|
||||
identifier,
|
||||
text,
|
||||
@@ -584,11 +801,22 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
resolveContentType(format),
|
||||
audioBytes
|
||||
);
|
||||
Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
|
||||
Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
|
||||
|
||||
// 通过infraFileId查询TikUserFileDO,获取用户文件ID
|
||||
TikUserFileDO userFile = userFileMapper.selectOne(
|
||||
new LambdaQueryWrapperX<TikUserFileDO>()
|
||||
.eq(TikUserFileDO::getFileId, infraFileId)
|
||||
.eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId())
|
||||
.orderByDesc(TikUserFileDO::getId)
|
||||
.last("LIMIT 1"));
|
||||
if (userFile == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录");
|
||||
}
|
||||
|
||||
AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
|
||||
respVO.setFileId(fileId);
|
||||
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
|
||||
respVO.setFileId(infraFileId); // 返回infraFileId,保持与原有逻辑一致
|
||||
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL
|
||||
respVO.setFormat(format);
|
||||
respVO.setSampleRate(cache.getSampleRate());
|
||||
respVO.setRequestId(cache.getRequestId());
|
||||
@@ -685,21 +913,40 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
@Async
|
||||
public void asyncTranscribeVoice(Long voiceId, String fileUrl) {
|
||||
try {
|
||||
// 1. 检查是否已经识别过(防重复)
|
||||
TikUserVoiceDO existingVoice = voiceMapper.selectById(voiceId);
|
||||
if (existingVoice == null) {
|
||||
log.warn("[asyncTranscribeVoice][配音记录不存在,配音编号({})]", voiceId);
|
||||
return;
|
||||
}
|
||||
|
||||
// 如果已经有识别结果,不再重复识别
|
||||
if (StrUtil.isNotBlank(existingVoice.getTranscription())) {
|
||||
log.info("[asyncTranscribeVoice][配音已经识别过,配音编号({}),跳过识别]", voiceId);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("[asyncTranscribeVoice][开始识别,配音编号({}),文件URL({})]", voiceId, fileUrl);
|
||||
Object result = tikHupService.videoToCharacters2(Collections.singletonList(fileUrl));
|
||||
|
||||
|
||||
// 解析识别结果
|
||||
String transcription = extractTranscription(result);
|
||||
|
||||
|
||||
if (StrUtil.isNotBlank(transcription)) {
|
||||
// 更新识别结果
|
||||
TikUserVoiceDO updateObj = new TikUserVoiceDO()
|
||||
.setId(voiceId)
|
||||
.setTranscription(transcription);
|
||||
voiceMapper.updateById(updateObj);
|
||||
log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length());
|
||||
// 二次检查:解析后再次检查是否已经有识别结果(避免并发重复)
|
||||
TikUserVoiceDO currentVoice = voiceMapper.selectById(voiceId);
|
||||
if (currentVoice != null && StrUtil.isBlank(currentVoice.getTranscription())) {
|
||||
// 更新识别结果
|
||||
TikUserVoiceDO updateObj = new TikUserVoiceDO()
|
||||
.setId(voiceId)
|
||||
.setTranscription(transcription);
|
||||
voiceMapper.updateById(updateObj);
|
||||
log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length());
|
||||
} else {
|
||||
log.info("[asyncTranscribeVoice][并发跳过更新,配音编号({})已经有识别结果]", voiceId);
|
||||
}
|
||||
} else {
|
||||
log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]",
|
||||
log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]",
|
||||
voiceId, result instanceof CommonResult ? ((CommonResult<?>) result).getCode() : "未知");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
@@ -773,17 +1020,28 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
if (CollUtil.isEmpty(results)) {
|
||||
return null;
|
||||
}
|
||||
Object lastObj = results.get(results.size() - 1);
|
||||
if (!(lastObj instanceof JSONObject lastResult)) {
|
||||
|
||||
// 阿里云语音识别:取第一个结果即可
|
||||
Object firstObj = results.get(0);
|
||||
if (!(firstObj instanceof JSONObject firstResult)) {
|
||||
return null;
|
||||
}
|
||||
String transcriptionUrl = lastResult.getStr("transcription_url");
|
||||
if (StrUtil.isBlank(transcriptionUrl)) {
|
||||
return null;
|
||||
|
||||
// 先从第一个结果中直接提取文本
|
||||
String directText = extractTextFromJson(firstResult);
|
||||
if (StrUtil.isNotBlank(directText)) {
|
||||
return directText;
|
||||
}
|
||||
StringBuilder builder = new StringBuilder();
|
||||
appendRemoteTranscription(builder, transcriptionUrl);
|
||||
return builder.length() > 0 ? builder.toString().trim() : null;
|
||||
|
||||
// 如果没有直接文本,尝试获取 transcription_url
|
||||
String transcriptionUrl = firstResult.getStr("transcription_url");
|
||||
if (StrUtil.isNotBlank(transcriptionUrl)) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
appendRemoteTranscription(builder, transcriptionUrl);
|
||||
return builder.length() > 0 ? builder.toString().trim() : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[parseTranscriptionText][解析Paraformer结果失败]", e);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Data;
|
||||
|
||||
@@ -10,33 +11,33 @@ import lombok.Data;
|
||||
@Data
|
||||
public class AppTikVoicePreviewReqVO {
|
||||
|
||||
@Schema(description = "输入文本")
|
||||
@Schema(description = "配音编号(tik_user_voice.id),用户配音必传,系统配音可不传")
|
||||
private Long voiceConfigId;
|
||||
|
||||
@Schema(description = "CosyVoice音色ID(系统配音必传,用户配音可不传)")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "语音文件URL(当使用语音URL合成时必传,替代voiceId)")
|
||||
private String fileUrl;
|
||||
|
||||
@Schema(description = "语音文本/识别文本(当使用fileUrl时必传)")
|
||||
@Size(max = 4000, message = "语音文本不能超过 4000 个字符")
|
||||
private String transcriptionText;
|
||||
|
||||
@Schema(description = "输入文本(可选,如果不传则使用配音的识别文本或默认文本)")
|
||||
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
|
||||
private String inputText;
|
||||
|
||||
@Schema(description = "识别文本,用于拼接")
|
||||
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
|
||||
private String transcriptionText;
|
||||
|
||||
@Schema(description = "音色 ID(CosyVoice voiceId)")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)")
|
||||
private String fileUrl;
|
||||
|
||||
@Schema(description = "模型名称,默认 cosyvoice-v2")
|
||||
private String model;
|
||||
|
||||
@Schema(description = "语速", example = "1.0")
|
||||
@Schema(description = "语速(可选,默认1.0)", example = "1.0")
|
||||
private Float speechRate;
|
||||
|
||||
@Schema(description = "音量", example = "0")
|
||||
@Schema(description = "音量(可选,默认0)", example = "0")
|
||||
private Float volume;
|
||||
|
||||
@Schema(description = "情感", example = "neutral")
|
||||
@Schema(description = "情感(可选,默认neutral)", example = "neutral")
|
||||
private String emotion;
|
||||
|
||||
@Schema(description = "音频格式,默认 wav")
|
||||
@Schema(description = "音频格式(可选,默认mp3)", example = "mp3")
|
||||
private String audioFormat;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,9 @@ public class AppTikVoiceTtsReqVO {
|
||||
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
|
||||
private String inputText;
|
||||
|
||||
@Schema(description = "配音编号(tik_user_voice.id),用户配音必传,系统配音可不传")
|
||||
private Long voiceConfigId;
|
||||
|
||||
@Schema(description = "识别文本,用于拼接")
|
||||
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
|
||||
private String transcriptionText;
|
||||
|
||||
Reference in New Issue
Block a user