feat: 功能优化
This commit is contained in:
@@ -120,7 +120,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
|||||||
.input(request.getText())
|
.input(request.getText())
|
||||||
.voice(request.getVoiceId())
|
.voice(request.getVoiceId())
|
||||||
.speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
|
.speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
|
||||||
.sampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate())
|
|
||||||
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
|
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
@@ -150,7 +149,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
|
|||||||
VoiceTtsResult result = new VoiceTtsResult();
|
VoiceTtsResult result = new VoiceTtsResult();
|
||||||
result.setAudio(Base64.getDecoder().decode(base64Audio));
|
result.setAudio(Base64.getDecoder().decode(base64Audio));
|
||||||
result.setFormat(sfRequest.getResponseFormat());
|
result.setFormat(sfRequest.getResponseFormat());
|
||||||
result.setSampleRate(sfRequest.getSampleRate());
|
|
||||||
result.setVoiceId(request.getVoiceId());
|
result.setVoiceId(request.getVoiceId());
|
||||||
|
|
||||||
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",
|
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",
|
||||||
|
|||||||
@@ -21,9 +21,9 @@ public class SiliconFlowTtsRequest {
|
|||||||
private String model;
|
private String model;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 待合成文本(API 参数名:text)
|
* 待合成文本(API 参数名:input)
|
||||||
*/
|
*/
|
||||||
@JsonProperty("text")
|
@JsonProperty("input")
|
||||||
private String input;
|
private String input;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -36,16 +36,12 @@ public class SiliconFlowTtsRequest {
|
|||||||
*/
|
*/
|
||||||
private Float speed;
|
private Float speed;
|
||||||
|
|
||||||
/**
|
|
||||||
* 采样率(如 24000)
|
|
||||||
*/
|
|
||||||
@JsonProperty("sample_rate")
|
|
||||||
private Integer sampleRate;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 响应格式(mp3, wav, pcm)(API 参数名:format)
|
* 响应格式(mp3, opus, wav, pcm)(API 参数名:response_format)
|
||||||
*/
|
*/
|
||||||
@JsonProperty("format")
|
@JsonProperty("response_format")
|
||||||
private String responseFormat;
|
private String responseFormat;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -427,7 +427,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
|||||||
transcriptionText,
|
transcriptionText,
|
||||||
reqVO.getInputText(),
|
reqVO.getInputText(),
|
||||||
false);
|
false);
|
||||||
// 移除appendEmotion调用,情感通过instruction参数传递
|
|
||||||
|
|
||||||
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
|
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
|
||||||
voiceId,
|
voiceId,
|
||||||
@@ -493,128 +492,75 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
|||||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||||
Long voiceConfigId = reqVO.getVoiceConfigId();
|
Long voiceConfigId = reqVO.getVoiceConfigId();
|
||||||
|
|
||||||
// 增加请求参数日志
|
log.info("[previewVoice][试听,voiceConfigId={}, voiceId={}, userId={}]",
|
||||||
log.info("[previewVoice][开始试听,请求参数:voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]",
|
voiceConfigId, reqVO.getVoiceId(), userId);
|
||||||
voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
|
|
||||||
|
|
||||||
String voiceId = null;
|
String voiceId = null;
|
||||||
String fileUrl = null;
|
String fileUrl = null;
|
||||||
String transcriptionText = null;
|
String referenceText = null;
|
||||||
String inputText;
|
|
||||||
|
|
||||||
// 1. 如果传入了fileUrl和transcriptionText,直接使用(通过语音URL合成)
|
// 1. 通过语音URL合成
|
||||||
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
|
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
|
||||||
log.info("[previewVoice][使用语音URL合成,用户({})]", userId);
|
|
||||||
// 如果传入的是预签名URL,提取原始URL(去除查询参数),避免二次签名
|
|
||||||
String rawFileUrl = extractRawUrl(reqVO.getFileUrl());
|
String rawFileUrl = extractRawUrl(reqVO.getFileUrl());
|
||||||
// 如果提取后的URL与原始URL不同,说明是预签名URL,需要重新生成预签名URL
|
fileUrl = rawFileUrl.equals(reqVO.getFileUrl())
|
||||||
// 否则直接使用(可能是原始URL或公开URL)
|
? reqVO.getFileUrl()
|
||||||
if (!rawFileUrl.equals(reqVO.getFileUrl())) {
|
: fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
|
||||||
// 重新生成预签名URL,确保有效期足够长
|
referenceText = reqVO.getTranscriptionText();
|
||||||
fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
|
|
||||||
log.info("[previewVoice][检测到预签名URL,已提取原始URL并重新生成预签名URL]");
|
|
||||||
} else {
|
|
||||||
fileUrl = reqVO.getFileUrl();
|
|
||||||
}
|
|
||||||
transcriptionText = reqVO.getTranscriptionText();
|
|
||||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText);
|
|
||||||
}
|
}
|
||||||
// 2. 如果有配置ID,根据配置ID查询配音信息(用户配音)
|
// 2. 用户配音
|
||||||
else if (voiceConfigId != null) {
|
else if (voiceConfigId != null) {
|
||||||
log.info("[previewVoice][开始试听,配音编号({}),用户({})]", voiceConfigId, userId);
|
|
||||||
|
|
||||||
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
|
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
|
||||||
log.info("[previewVoice][查询配音结果:voice={},配音编号={},用户ID={}]",
|
if (voice == null || !voice.getUserId().equals(userId)) {
|
||||||
voice != null ? "存在" : "不存在", voiceConfigId, userId);
|
throw exception(VOICE_NOT_EXISTS, "配音不存在");
|
||||||
|
|
||||||
if (voice == null) {
|
|
||||||
log.warn("[previewVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId);
|
|
||||||
throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId);
|
|
||||||
}
|
|
||||||
if (!voice.getUserId().equals(userId)) {
|
|
||||||
log.warn("[previewVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]",
|
|
||||||
voiceConfigId, voice.getUserId(), userId);
|
|
||||||
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据)
|
|
||||||
if (StrUtil.isNotBlank(voice.getVoiceId())) {
|
if (StrUtil.isNotBlank(voice.getVoiceId())) {
|
||||||
log.info("[previewVoice][使用复刻音色ID试听,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId());
|
|
||||||
voiceId = voice.getVoiceId();
|
voiceId = voice.getVoiceId();
|
||||||
// 注意:使用 voiceId 时,不依赖 transcriptionText,直接使用前端传入的 inputText
|
|
||||||
transcriptionText = null; // 清除 transcriptionText
|
|
||||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
|
|
||||||
} else {
|
} else {
|
||||||
log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId);
|
|
||||||
// 获取文件信息,用于获取文件URL
|
|
||||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||||
if (fileDO == null) {
|
if (fileDO == null) {
|
||||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 使用文件URL和识别文本进行合成
|
|
||||||
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||||
transcriptionText = voice.getTranscription();
|
referenceText = voice.getTranscription();
|
||||||
if (StrUtil.isBlank(transcriptionText)) {
|
if (StrUtil.isBlank(referenceText)) {
|
||||||
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
|
||||||
}
|
}
|
||||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
|
|
||||||
StrUtil.blankToDefault(transcriptionText, getPreviewText()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId)
|
// 3. 系统配音
|
||||||
else {
|
else {
|
||||||
log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId);
|
|
||||||
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
|
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
|
||||||
if (StrUtil.isBlank(voiceId)) {
|
if (StrUtil.isBlank(voiceId)) {
|
||||||
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
|
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
|
||||||
}
|
}
|
||||||
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String finalText = determineSynthesisText(
|
// 统一处理:使用前端传入的 inputText,否则使用默认试听文本
|
||||||
transcriptionText,
|
String finalText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
|
||||||
inputText,
|
|
||||||
true);
|
|
||||||
|
|
||||||
// 使用请求参数或默认值
|
|
||||||
String instruction = reqVO.getInstruction();
|
String instruction = reqVO.getInstruction();
|
||||||
// 注意:instruction参数现在直接传递给CosyVoice,不再添加到文本中
|
|
||||||
Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
|
Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
|
||||||
Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
|
Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
|
||||||
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
|
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
|
||||||
|
|
||||||
// 构建缓存key(使用fileUrl或voiceId)
|
// 缓存
|
||||||
String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX,
|
String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, voiceId, fileUrl, finalText,
|
||||||
voiceId,
|
speechRate, volume, instruction, audioFormat, null);
|
||||||
fileUrl,
|
|
||||||
finalText,
|
|
||||||
speechRate,
|
|
||||||
volume,
|
|
||||||
instruction,
|
|
||||||
audioFormat,
|
|
||||||
null);
|
|
||||||
PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
|
PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
|
||||||
|
|
||||||
if (previewCache != null) {
|
if (previewCache != null) {
|
||||||
log.info("[previewVoice][使用缓存,配音编号({}),voiceId({}),cacheKey({})]",
|
return buildPreviewResp(previewCache.getAudioBase64(), previewCache.getFormat(), voiceId);
|
||||||
voiceConfigId, voiceId, cacheKey);
|
|
||||||
// 缓存命中,直接返回缓存的数据(Base64)
|
|
||||||
String cachedBase64 = previewCache.getAudioBase64();
|
|
||||||
return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("[previewVoice][调用语音合成服务,配音编号({}),voiceId({}),fileUrl({}),文本长度({}),供应商({})]",
|
// TTS 合成
|
||||||
voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType());
|
log.info("[previewVoice][TTS,voiceId={}, textLen={}]", voiceId, finalText.length());
|
||||||
|
|
||||||
// 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认)
|
|
||||||
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
|
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
|
||||||
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
|
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
|
||||||
.text(finalText)
|
.text(finalText)
|
||||||
.voiceId(voiceId)
|
.voiceId(voiceId)
|
||||||
.fileUrl(fileUrl)
|
.fileUrl(fileUrl)
|
||||||
.referenceText(transcriptionText)
|
.referenceText(referenceText)
|
||||||
.model(null) // 使用默认模型
|
.model(null)
|
||||||
.speechRate(speechRate)
|
.speechRate(speechRate)
|
||||||
.volume(volume)
|
.volume(volume)
|
||||||
.instruction(instruction)
|
.instruction(instruction)
|
||||||
@@ -624,22 +570,13 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
|||||||
.build();
|
.build();
|
||||||
|
|
||||||
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
|
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
|
||||||
|
|
||||||
String format = defaultFormat(ttsResult.getFormat(), audioFormat);
|
String format = defaultFormat(ttsResult.getFormat(), audioFormat);
|
||||||
String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
|
|
||||||
String objectName = buildFileName(identifier, format);
|
|
||||||
|
|
||||||
// 【安全方案】不暴露OSS链接,直接返回Base64编码的音频数据
|
|
||||||
// 这样前端可直接播放,无需额外请求,也不会暴露OSS存储信息
|
|
||||||
String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
|
String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
|
||||||
log.info("[previewVoice][合成成功,配音编号({}),voiceId({}),format({}),audioSize={}]",
|
|
||||||
voiceConfigId, voiceId, format, ttsResult.getAudio().length);
|
|
||||||
|
|
||||||
// 缓存Base64数据(用于提升响应速度)
|
savePreviewCache(cacheKey, new PreviewCacheEntry(audioBase64, format,
|
||||||
PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
|
ttsResult.getSampleRate(), ttsResult.getRequestId()));
|
||||||
savePreviewCache(cacheKey, entry);
|
|
||||||
|
|
||||||
// 返回Base64数据,前端使用 data:audio/...;base64,... 格式播放
|
log.info("[previewVoice][成功,voiceId={}, format={}, size={}]", voiceId, format, ttsResult.getAudio().length);
|
||||||
return buildPreviewResp(audioBase64, format, voiceId);
|
return buildPreviewResp(audioBase64, format, voiceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -716,25 +653,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
|||||||
throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
|
throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String appendEmotion(String text, String emotion) {
|
|
||||||
if (StrUtil.isBlank(text)) {
|
|
||||||
return text;
|
|
||||||
}
|
|
||||||
if (StrUtil.isBlank(emotion) || "neutral".equalsIgnoreCase(emotion)) {
|
|
||||||
return text;
|
|
||||||
}
|
|
||||||
String emotionLabel = switch (emotion.toLowerCase()) {
|
|
||||||
case "happy" -> "高兴";
|
|
||||||
case "angry" -> "愤怒";
|
|
||||||
case "sad" -> "悲伤";
|
|
||||||
case "scared" -> "害怕";
|
|
||||||
case "disgusted" -> "厌恶";
|
|
||||||
case "surprised" -> "惊讶";
|
|
||||||
default -> emotion;
|
|
||||||
};
|
|
||||||
return "【情感:" + emotionLabel + "】" + text;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 从URL中提取原始URL(去除查询参数和锚点)
|
* 从URL中提取原始URL(去除查询参数和锚点)
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ public class AppTikVoicePreviewReqVO {
|
|||||||
@Size(max = 4000, message = "语音文本不能超过 4000 个字符")
|
@Size(max = 4000, message = "语音文本不能超过 4000 个字符")
|
||||||
private String transcriptionText;
|
private String transcriptionText;
|
||||||
|
|
||||||
@Schema(description = "输入文本(可选,如果不传则使用配音的识别文本或默认文本)")
|
@Schema(description = "输入文本(可选,不传则使用默认试听文本)")
|
||||||
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
|
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
|
||||||
private String inputText;
|
private String inputText;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user