优化功能

This commit is contained in:
2025-11-22 00:25:29 +08:00
parent bd367c645b
commit a3cc6c6db0
22 changed files with 595 additions and 258 deletions

View File

@@ -8,7 +8,7 @@ import request from './http'
*/
export function createDigitalHumanTask(data) {
return request({
url: '/api/tik/digital-human/task/create',
url: '/webApi/api/tik/digital-human/task/create',
method: 'post',
data
})
@@ -19,7 +19,7 @@ export function createDigitalHumanTask(data) {
*/
export function getDigitalHumanTask(taskId) {
return request({
url: '/api/tik/digital-human/task/get',
url: '/webApi/api/tik/digital-human/task/get',
method: 'get',
params: { taskId }
})
@@ -30,7 +30,7 @@ export function getDigitalHumanTask(taskId) {
*/
export function getDigitalHumanTaskPage(params) {
return request({
url: '/api/tik/digital-human/task/page',
url: '/webApi/api/tik/digital-human/task/page',
method: 'get',
params
})
@@ -41,7 +41,7 @@ export function getDigitalHumanTaskPage(params) {
*/
export function getTaskStatistics() {
return request({
url: '/api/tik/digital-human/task/statistics',
url: '/webApi/api/tik/digital-human/task/statistics',
method: 'get'
})
}
@@ -51,7 +51,7 @@ export function getTaskStatistics() {
*/
export function cancelTask(taskId) {
return request({
url: `/api/tik/digital-human/task/${taskId}/cancel`,
url: `/webApi/api/tik/digital-human/task/${taskId}/cancel`,
method: 'post'
})
}
@@ -61,7 +61,7 @@ export function cancelTask(taskId) {
*/
export function retryTask(taskId) {
return request({
url: `/api/tik/digital-human/task/${taskId}/retry`,
url: `/webApi/api/tik/digital-human/task/${taskId}/retry`,
method: 'post'
})
}
@@ -71,7 +71,7 @@ export function retryTask(taskId) {
*/
export function deleteTask(taskId) {
return request({
url: `/api/tik/digital-human/task/${taskId}`,
url: `/webApi/api/tik/digital-human/task/${taskId}`,
method: 'delete'
})
}

View File

@@ -26,19 +26,23 @@ const isPlayingPreview = ref(false) // 是否正在播放试听音频
const isPlayingSynthesized = ref(false) // 是否正在播放已合成的音频
const pollingInterval = ref(null) // 轮询间隔ID
// Base64音频缓存
const audioBase64Cache = new Map()
const AUDIO_CACHE_MAX_SIZE = 10 // 最多缓存10个音频
// TTS 配置
const ttsText = ref('')
const selectedTtsVoice = ref('')
const speechRate = ref(1.0)
const emotion = ref('neutral')
const instruction = ref('neutral') // 指令参数,用于控制音色风格
const voiceSource = ref('user')
// 系统音色库
// 系统音色库使用CosyVoice v3-flash模型
const SYSTEM_VOICES = [
{ id: 'sys-pro-01', name: '星悦·知性女声', gender: 'female', category: '职业', description: '温柔专业', voiceId: 'cosyvoice-v2-sys-pro-01' },
{ id: 'sys-boy-01', name: '澄澄·少男音', gender: 'male', category: '少男', description: '年轻清爽', voiceId: 'cosyvoice-v2-sys-boy-01' },
{ id: 'sys-girl-01', name: '沁雪·少女音', gender: 'female', category: '少女', description: '活泼甜美', voiceId: 'cosyvoice-v2-sys-girl-01' },
{ id: 'sys-man-01', name: '寰宇·男青年', gender: 'male', category: '男青年', description: '磁性沉稳', voiceId: 'cosyvoice-v2-sys-man-01' }
{ id: 'sys-pro-01', name: '星悦·知性女声', gender: 'female', category: '职业', description: '温柔专业', voiceId: 'cosyvoice-v3-flash-sys-pro-01', defaultInstruction: '请用温柔专业的语调朗读' },
{ id: 'sys-boy-01', name: '澄澄·少男音', gender: 'male', category: '少男', description: '年轻清爽', voiceId: 'cosyvoice-v3-flash-sys-boy-01', defaultInstruction: '请用年轻清爽的语调朗读' },
{ id: 'sys-girl-01', name: '沁雪·少女音', gender: 'female', category: '少女', description: '活泼甜美', voiceId: 'cosyvoice-v3-flash-sys-girl-01', defaultInstruction: '请用活泼甜美的语调朗读' },
{ id: 'sys-man-01', name: '寰宇·男青年', gender: 'male', category: '男青年', description: '磁性沉稳', voiceId: 'cosyvoice-v3-flash-sys-man-01', defaultInstruction: '请用磁性沉稳的语调朗读' }
]
// 用户音色列表
@@ -57,7 +61,15 @@ const userVoiceCards = computed(() =>
}))
)
const displayedVoices = computed(() => userVoiceCards.value)
const displayedVoices = computed(() => {
if (voiceSource.value === 'system') {
return SYSTEM_VOICES.map(voice => ({
...voice,
source: 'system'
}))
}
return userVoiceCards.value
})
const selectedVoiceMeta = computed(() =>
displayedVoices.value.find(voice => `${voice.source}-${voice.id}` === selectedTtsVoice.value)
@@ -66,7 +78,16 @@ const selectedVoiceMeta = computed(() =>
// UI 状态
const speechRateMarks = { 0.5: '0.5x', 1: '1x', 1.5: '1.5x', 2: '2x' }
const speechRateDisplay = computed(() => `${speechRate.value.toFixed(1)}x`)
const canGenerate = computed(() => !!(synthesizedAudio.value?.fileId && uploadedVideo.value && !isGenerating.value))
// 生成数字人的条件:选中了音色 + 上传了视频 + 没有正在生成
// 注意:不需要先合成语音,可以直接使用音色配置
const canGenerate = computed(() => {
const hasText = ttsText.value.trim() // 文案必填
const hasVoice = selectedVoiceMeta.value // 必须选中音色
const hasVideo = uploadedVideo.value // 必须上传视频
const notGenerating = !isGenerating.value // 不能正在生成
return !!(hasText && hasVoice && hasVideo && notGenerating)
})
// 音色选择
const setVoiceSource = (source) => {
@@ -75,6 +96,8 @@ const setVoiceSource = (source) => {
selectedTtsVoice.value = ''
if (source === 'user' && userVoiceCards.value.length > 0) {
selectVoiceProfile(userVoiceCards.value[0])
} else if (source === 'system' && SYSTEM_VOICES.length > 0) {
selectVoiceProfile({ ...SYSTEM_VOICES[0], source: 'system' })
}
}
@@ -89,12 +112,8 @@ const playVoiceSample = async (voice) => {
if (previewLoadingVoiceId.value === voice.id || isPlayingPreview.value) {
return
}
if (voice.source === 'user' || (voice.source === 'system' && voice.voiceId)) {
return triggerVoicePreview(voice)
}
const url = voice.previewUrl || voice.fileUrl
if (!url) return message.warning('暂无可试听的音频')
playAudioPreview(url)
// 用户音色和系统音色都走实时试听流程
return triggerVoicePreview(voice)
}
const triggerVoicePreview = async (voice) => {
@@ -137,6 +156,7 @@ const triggerVoicePreview = async (voice) => {
const buildPreviewParams = (voice) => {
if (voice.source === 'user') {
// 使用voiceConfigId让后端查询数据库获取文件URL和transcriptionText
// 用户音色不传instruction
const configId = voice.rawId || extractIdFromString(voice.id)
if (!configId) {
message.error('配音配置无效')
@@ -145,15 +165,15 @@ const buildPreviewParams = (voice) => {
return {
voiceConfigId: configId,
inputText: ttsText.value, // 传递用户输入的文本
emotion: emotion.value || 'neutral',
speechRate: speechRate.value || 1.0,
audioFormat: 'mp3'
}
} else {
// 系统音色使用用户选择的instruction
return {
voiceId: voice.voiceId,
inputText: ttsText.value, // 传递用户输入的文本
emotion: emotion.value || 'neutral',
instruction: instruction.value && instruction.value !== 'neutral' ? instruction.value : (voice.defaultInstruction || '请用自然流畅的语调朗读'),
speechRate: speechRate.value || 1.0,
audioFormat: 'mp3'
}
@@ -177,11 +197,10 @@ const handleSynthesizeVoice = async () => {
const params = {
inputText: ttsText.value,
speechRate: speechRate.value,
emotion: emotion.value,
audioFormat: 'mp3'
}
// 如果是用户配音使用voiceConfigId让后端查询
// 如果是用户配音使用voiceConfigId让后端查询不传instruction
if (voice.source === 'user') {
const configId = voice.rawId || extractIdFromString(voice.id)
if (!configId) {
@@ -190,14 +209,14 @@ const handleSynthesizeVoice = async () => {
}
params.voiceConfigId = configId
} else {
// 使用系统音色voiceId
// 使用系统音色voiceId和用户选择的instruction
const voiceId = voice.voiceId || voice.rawId
if (!voiceId) {
message.warning('音色配置无效')
return
}
params.voiceId = voiceId
params.model = voice.model
params.instruction = instruction.value && instruction.value !== 'neutral' ? instruction.value : (voice.defaultInstruction || '请用自然流畅的语调朗读')
}
const res = await VoiceService.synthesize(params)
@@ -206,7 +225,7 @@ const handleSynthesizeVoice = async () => {
synthesizedAudio.value = res.data
message.success('语音合成成功')
} else {
message.error(res.msg || '合成失败')
message.error(res.message || '合成失败')
}
} catch (error) {
console.error('synthesize error:', error)
@@ -289,22 +308,8 @@ const generateVideo = async () => {
currentTaskStep.value = 'prepare_files'
try {
// 1. 首先上传音频和视频文件到后端
message.loading('正在上传文件...', 0)
// 上传音频(使用合成后的音频或原始音频)
let audioFileId = null
let audioUrl = null
if (synthesizedAudio.value?.fileId) {
// 如果有已合成的音频使用其fileId
audioFileId = synthesizedAudio.value.fileId
} else {
// 否则使用voiceConfigId让后端处理
audioFileId = voice.rawId || extractIdFromString(voice.id)
}
// 上传视频文件
// 1. 上传视频文件(只上传视频,音频由后端实时合成)
message.loading('正在上传视频...', 0)
const videoFileId = await uploadVideoFile(uploadedVideoFile.value)
if (!videoFileId) {
throw new Error('视频上传失败')
@@ -312,13 +317,15 @@ const generateVideo = async () => {
message.destroy()
// 2. 创建数字人任务
// 2. 创建数字人任务简化只使用voiceId后端实时TTS
const taskData = {
taskName: `数字人任务_${Date.now()}`,
audioFileId: audioFileId,
videoFileId: videoFileId,
// 音频由后端实时合成使用voiceId
voiceId: voice.voiceId || voice.rawId,
inputText: ttsText.value, // 文本内容用于TTS合成
speechRate: speechRate.value,
emotion: emotion.value,
instruction: voice.source === 'user' ? undefined : (instruction.value && instruction.value !== 'neutral' ? instruction.value : (voice.defaultInstruction || '请用自然流畅的语调朗读')),
guidanceScale: 1,
seed: 8888
}
@@ -350,10 +357,10 @@ const generateVideo = async () => {
const uploadVideoFile = async (file) => {
try {
const res = await MaterialService.uploadFile(file, 'video')
if (res.code === 0 && res.data?.id) {
return res.data.id
if (res.code === 0) {
return res.data // res.data就是文件ID
} else {
throw new Error(res.msg || '上传失败')
throw new Error(res.message || '上传失败')
}
} catch (error) {
console.error('uploadVideoFile error:', error)
@@ -528,17 +535,42 @@ const playAudioPreview = (url, options = {}) => {
const playAudioFromBase64 = (audioBase64, format = 'mp3', onEnded = null) => {
try {
previewObjectUrl && URL.revokeObjectURL(previewObjectUrl)
const byteCharacters = window.atob(audioBase64)
const byteNumbers = new Array(byteCharacters.length)
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i)
// 检查缓存
const cacheKey = `${audioBase64.substring(0, 32)}_${format}` // 使用base64前32位作为缓存键
let objectUrl = audioBase64Cache.get(cacheKey)
if (!objectUrl) {
// 解码base64并创建blob
const byteCharacters = window.atob(audioBase64)
const byteNumbers = new Array(byteCharacters.length)
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i)
}
const mime = format === 'mp3' ? 'audio/mpeg' : `audio/${format}`
const blob = new Blob([new Uint8Array(byteNumbers)], { type: mime })
objectUrl = URL.createObjectURL(blob)
// 管理缓存大小
if (audioBase64Cache.size >= AUDIO_CACHE_MAX_SIZE) {
// 清理最早的缓存
const firstKey = audioBase64Cache.keys().next().value
const oldUrl = audioBase64Cache.get(firstKey)
URL.revokeObjectURL(oldUrl)
audioBase64Cache.delete(firstKey)
}
// 存储到缓存
audioBase64Cache.set(cacheKey, objectUrl)
}
const mime = format === 'mp3' ? 'audio/mpeg' : `audio/${format}`
const blob = new Blob([new Uint8Array(byteNumbers)], { type: mime })
previewObjectUrl = URL.createObjectURL(blob)
// 清理旧的previewObjectUrl
if (previewObjectUrl && previewObjectUrl !== objectUrl) {
URL.revokeObjectURL(previewObjectUrl)
}
previewObjectUrl = objectUrl
playAudioPreview(previewObjectUrl, {
revokeOnEnd: true,
revokeOnEnd: false, // 缓存模式下不立即释放
onEnded: () => {
isPlayingPreview.value = false
onEnded && onEnded()
@@ -555,12 +587,20 @@ const playAudioFromBase64 = (audioBase64, format = 'mp3', onEnded = null) => {
// 生命周期
onMounted(async () => {
await voiceStore.load()
userVoiceCards.value.length > 0 && selectVoiceProfile(userVoiceCards.value[0])
// 默认选择第一个音色
if (voiceSource.value === 'user' && userVoiceCards.value.length > 0) {
selectVoiceProfile(userVoiceCards.value[0])
} else if (voiceSource.value === 'system' && SYSTEM_VOICES.length > 0) {
selectVoiceProfile({ ...SYSTEM_VOICES[0], source: 'system' })
}
})
onUnmounted(() => {
previewAudio?.pause?.()
previewAudio = null
// 清理所有缓存的ObjectURL
audioBase64Cache.forEach(url => URL.revokeObjectURL(url))
audioBase64Cache.clear()
previewObjectUrl && URL.revokeObjectURL(previewObjectUrl)
// 重置播放状态
isPlayingPreview.value = false
@@ -575,12 +615,17 @@ onUnmounted(() => {
// 监听器
watch(voiceSource, () => {
selectedTtsVoice.value = ''
userVoiceCards.value.length > 0 && selectVoiceProfile(userVoiceCards.value[0])
if (voiceSource.value === 'user' && userVoiceCards.value.length > 0) {
selectVoiceProfile(userVoiceCards.value[0])
} else if (voiceSource.value === 'system' && SYSTEM_VOICES.length > 0) {
selectVoiceProfile({ ...SYSTEM_VOICES[0], source: 'system' })
}
})
watch(() => voiceStore.profiles, () => {
voiceSource.value === 'user' && userVoiceCards.value.length > 0 &&
!selectedTtsVoice.value && selectVoiceProfile(userVoiceCards.value[0])
if (voiceSource.value === 'user' && userVoiceCards.value.length > 0 && !selectedTtsVoice.value) {
selectVoiceProfile(userVoiceCards.value[0])
}
})
watch([ttsText, selectedTtsVoice], () => {
@@ -613,7 +658,7 @@ let previewObjectUrl = ''
<div class="voice-source-toggle">
<button
v-for="source in ['user']"
v-for="source in ['user', 'system']"
:key="source"
class="source-btn"
:class="{ active: voiceSource === source }"
@@ -623,8 +668,8 @@ let previewObjectUrl = ''
</button>
</div>
<div v-if="userVoiceCards.length === 0" class="empty-voices">
还没有配音可先在"配音管理"中上传
<div v-if="displayedVoices.length === 0" class="empty-voices">
{{ voiceSource === 'user' ? '还没有配音可先在"配音管理"中上传' : '暂无可用的系统音色' }}
</div>
<div class="voice-list">
@@ -678,25 +723,17 @@ let previewObjectUrl = ''
</div>
</div>
<div class="control-group">
<div class="control-label">情感</div>
<div v-if="voiceSource === 'system'" class="control-group">
<div class="control-label">指令</div>
<div class="emotion-buttons">
<button
v-for="em in ['neutral', 'happy', 'angry', 'sad', 'scared', 'disgusted', 'surprised']"
:key="em"
v-for="inst in ['neutral', '请用自然流畅的语调朗读', '请用温柔专业的语调朗读', '请用热情洋溢的语调朗读', '请用低沉磁性的语调朗读', '请用活泼生动的语调朗读']"
:key="inst"
class="emotion-btn"
:class="{ active: emotion === em }"
@click="emotion = em"
:class="{ active: instruction === inst }"
@click="instruction = inst"
>
{{ {
neutral: '中性',
happy: '高兴',
angry: '愤怒',
sad: '悲伤',
scared: '害怕',
disgusted: '厌恶',
surprised: '惊讶'
}[em] }}
{{ inst === 'neutral' ? '中性' : inst }}
</button>
</div>
</div>

View File

@@ -238,7 +238,7 @@ public class TikUserFileServiceImpl implements TikUserFileService {
quotaService.increaseUsedStorage(userId, file.getSize());
log.info("[saveFileRecord][用户({})保存文件记录成功,文件编号({})infra文件编号({})]", userId, userFile.getId(), infraFileId);
// 返回 infra_file.id因为创建配音等操作需要使用 infra_file.id
// 返回 infra_file.id保持与现有配音功能的兼容性
return infraFileId;
}

View File

@@ -66,12 +66,12 @@ public class CosyVoiceClient {
SpeechSynthesizer synthesizer = null;
try {
log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, emotion={}]",
log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, instruction={}]",
request.getVoiceId(),
request.getText().length(),
StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()),
request.getSpeechRate(),
request.getEmotion());
request.getInstruction());
// 使用 DashScope SDK 构建参数(严格按文档)
// 注意speechRate 和 volume 需要转换为 int 类型
@@ -83,6 +83,10 @@ public class CosyVoiceClient {
.volume(request.getVolume() != null ? request.getVolume().intValue() : 0)
.build();
if (StrUtil.isNotBlank(request.getInstruction())) {
param.setInstruction(request.getInstruction());
}
// 初始化合成器(同步调用传 null
synthesizer = new SpeechSynthesizer(param, null);

View File

@@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import okhttp3.HttpUrl;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
@@ -42,9 +43,7 @@ public class LatentsyncClient {
private volatile OkHttpClient httpClient;
public LatentsyncSubmitResponse submitTask(LatentsyncSubmitRequest request) {
if (!properties.isEnabled()) {
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "未配置 Latentsync API Key");
}
validateEnabled();
validateRequest(request);
Map<String, Object> payload = buildPayload(request);
@@ -57,28 +56,37 @@ public class LatentsyncClient {
.post(RequestBody.create(body.getBytes(StandardCharsets.UTF_8), JSON))
.build();
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
String responseBody = response.body() != null ? response.body().string() : "";
if (!response.isSuccessful()) {
log.error("[Latentsync][submit failed][status={}, body={}]", response.code(), responseBody);
throw buildException(responseBody);
}
LatentsyncSubmitResponse submitResponse =
objectMapper.readValue(responseBody, LatentsyncSubmitResponse.class);
try {
LatentsyncSubmitResponse submitResponse = executeRequest(httpRequest, "submit");
// 验证requestId
if (StrUtil.isBlank(submitResponse.getRequestId())) {
log.error("[Latentsync][submit failed][response={}]", responseBody);
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "Latentsync 返回 requestId 为空");
}
return submitResponse;
} catch (ServiceException ex) {
throw ex;
} catch (Exception ex) {
log.error("[Latentsync][submit exception]", ex);
throw exception(LATENTSYNC_SUBMIT_FAILED);
}
} catch (ServiceException ex) {
throw ex;
} catch (Exception ex) {
log.error("[Latentsync][submit exception]", ex);
log.error("[Latentsync][build request exception]", ex);
throw exception(LATENTSYNC_SUBMIT_FAILED);
}
}
private void validateEnabled() {
if (!properties.isEnabled()) {
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "未配置 Latentsync API Key");
}
}
private void validateRequestId(String requestId) {
if (StrUtil.isBlank(requestId)) {
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "requestId 不能为空");
}
}
private void validateRequest(LatentsyncSubmitRequest request) {
if (request == null) {
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "请求体不能为空");
@@ -107,6 +115,64 @@ public class LatentsyncClient {
return payload;
}
/**
* 获取任务结果
*/
public LatentsyncSubmitResponse getTaskResult(String requestId) {
validateEnabled();
validateRequestId(requestId);
try {
// 构建GET请求URL使用HttpUrl确保参数正确编码
HttpUrl url = HttpUrl.parse(properties.getSubmitUrl())
.newBuilder()
.addQueryParameter("request_id", requestId)
.build();
Request httpRequest = new Request.Builder()
.url(url)
.addHeader("Authorization", "Bearer " + properties.getApiKey())
.get()
.build();
try {
return executeRequest(httpRequest, "get result", requestId);
} catch (ServiceException ex) {
throw ex;
} catch (Exception ex) {
log.error("[Latentsync][get result exception]", ex);
throw exception(LATENTSYNC_SUBMIT_FAILED);
}
} catch (Exception ex) {
log.error("[Latentsync][build request exception]", ex);
throw exception(LATENTSYNC_SUBMIT_FAILED);
}
}
/**
* 执行HTTP请求的通用方法
*/
private LatentsyncSubmitResponse executeRequest(Request httpRequest, String operation) {
return executeRequest(httpRequest, operation, null);
}
private LatentsyncSubmitResponse executeRequest(Request httpRequest, String operation, String requestId) {
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
String responseBody = response.body() != null ? response.body().string() : "";
if (!response.isSuccessful()) {
log.error("[Latentsync][{} failed][status={}, body={}]", operation, response.code(), responseBody);
throw buildException(responseBody);
}
log.info("[Latentsync][{} success][requestId={}, responseBody={}]",
operation, requestId, responseBody);
return objectMapper.readValue(responseBody, LatentsyncSubmitResponse.class);
} catch (Exception ex) {
log.error("[Latentsync][{} exception]", operation, ex);
throw exception(LATENTSYNC_SUBMIT_FAILED);
}
}
private OkHttpClient getHttpClient() {
if (httpClient == null) {
synchronized (this) {

View File

@@ -9,7 +9,7 @@ import lombok.Data;
public class CosyVoiceCloneRequest {
/**
* 复刻模型cosyvoice-v1 或 cosyvoice-v2
* 复刻模型cosyvoice-v3-flash 等
*/
private String targetModel;

View File

@@ -31,7 +31,7 @@ public class CosyVoiceTtsRequest {
private String referenceText;
/**
* 模型(默认 cosyvoice-v2
* 模型(默认 cosyvoice-v3-flash
*/
private String model;
@@ -46,9 +46,9 @@ public class CosyVoiceTtsRequest {
private Float volume;
/**
* 情感,可选
* 指令(用于控制音色风格),可选
*/
private String emotion;
private String instruction;
/**
* 采样率

View File

@@ -1,5 +1,6 @@
package cn.iocoder.yudao.module.tik.voice.client.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
import java.util.Map;
@@ -28,12 +29,33 @@ public class LatentsyncSubmitResponse {
/**
* 任务 ID
*/
@JsonProperty("request_id")
private String requestId;
/**
* 当前状态
*/
private String status;
/**
* 视频信息
*/
private VideoInfo video;
/**
* 种子值
*/
private Integer seed;
/**
* 视频信息
*/
@Data
public static class VideoInfo {
private String url;
private String contentType;
private Integer fileSize;
}
}

View File

@@ -23,7 +23,7 @@ public class CosyVoiceProperties {
/**
* 默认模型
*/
private String defaultModel = "cosyvoice-v2";
private String defaultModel = "cosyvoice-v3-flash";
/**
* 默认 voiceId可选
@@ -38,7 +38,7 @@ public class CosyVoiceProperties {
/**
* 默认音频格式
*/
private String audioFormat = "wav";
private String audioFormat = "mp3";
/**
* 试听默认示例文本

View File

@@ -18,7 +18,7 @@ public class LatentsyncProperties {
/**
* 302AI API Key可通过配置覆盖
*/
private String apiKey = "ab900d8c94094a90aed3e88cdba785c1";
private String apiKey = "sk-0IZJ2oo7VCkegFuF3JRsSRtyFUsIvLoHNK8OpulnlsStFN78";
/**
* 默认海外网关

View File

@@ -43,32 +43,30 @@ public class TikDigitalHumanTaskDO extends TenantBaseDO {
private String taskName;
// ========== 文件信息 ==========
/**
* 音频文件IDtik_user_file.id
*/
private Long audioFileId;
/**
* 视频文件IDtik_user_file.id
*/
private Long videoFileId;
/**
* 音频文件URL公网可访问用于Latentsync调用
*/
private String audioUrl;
/**
* 视频文件URL公网可访问用于Latentsync调用
*/
private String videoUrl;
// ========== 生成参数 ==========
// ========== TTS参数 ==========
/**
* 配音配置IDtik_user_voice.id
*/
private Long voiceConfigId;
/**
* CosyVoice生成的voice_id
* 音色IDCosyVoice voiceId
*/
private String voiceId;
/**
* 输入文本(用于语音合成)
*/
private String inputText;
/**
* 音频文件URL公网可访问用于Latentsync调用
*/
private String audioUrl;
// ========== 生成参数 ==========
/**
* 语速0.5-2.0
*/
@@ -81,6 +79,10 @@ public class TikDigitalHumanTaskDO extends TenantBaseDO {
* 情感neutral/happy/sad等
*/
private String emotion;
/**
* 指令(用于控制音色风格)
*/
private String instruction;
/**
* Latentsync guidance_scale1-2
*/

View File

@@ -0,0 +1,38 @@
package cn.iocoder.yudao.module.tik.voice.enums;
import cn.hutool.core.util.StrUtil;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* CosyVoice情感枚举
* 根据阿里云DashScope官方文档定义
* 参考https://help.aliyun.com/zh/dashscope/developer-reference/tts-api
*/
@Getter
@AllArgsConstructor
public enum CosyVoiceEmotionEnum {
NEUTRAL("neutral", "中性"),
HAPPY("happy", "高兴"),
SAD("sad", "悲伤"),
ANGRY("angry", "愤怒"),
SURPRISED("surprised", "惊讶"),
DISGUSTED("disgusted", "厌恶"),
SCARED("scared", "害怕");
private final String code;
private final String description;
public static CosyVoiceEmotionEnum getByCode(String code) {
if (StrUtil.isBlank(code)) {
return NEUTRAL;
}
for (CosyVoiceEmotionEnum emotion : values()) {
if (emotion.getCode().equalsIgnoreCase(code)) {
return emotion;
}
}
return NEUTRAL;
}
}

View File

@@ -1,10 +1,11 @@
package cn.iocoder.yudao.module.tik.voice.service;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import cn.iocoder.yudao.framework.common.pojo.CommonResult;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.iocoder.yudao.framework.common.pojo.PageResult;
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
import cn.iocoder.yudao.framework.common.util.http.HttpUtils;
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
import cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUtils;
import cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil;
@@ -13,21 +14,15 @@ import cn.iocoder.yudao.module.infra.dal.dataobject.file.FileDO;
import cn.iocoder.yudao.module.infra.dal.mysql.file.FileMapper;
import cn.iocoder.yudao.module.tik.file.dal.dataobject.TikUserFileDO;
import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
import cn.iocoder.yudao.module.tik.file.service.TikOssInitService;
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikDigitalHumanTaskDO;
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikDigitalHumanTaskMapper;
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
import cn.iocoder.yudao.module.tik.voice.enums.DigitalHumanTaskStatusEnum;
import cn.iocoder.yudao.module.tik.voice.enums.DigitalHumanTaskStepEnum;
import cn.iocoder.yudao.module.tik.voice.service.TikUserVoiceService;
import cn.iocoder.yudao.module.tik.voice.vo.*;
import cn.iocoder.yudao.module.tik.voice.client.LatentsyncClient;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncResultRespVO;
import cn.iocoder.yudao.module.tik.voice.service.LatentsyncService;
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
import cn.iocoder.yudao.framework.common.exception.ServiceException;
import cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants;
import cn.iocoder.yudao.framework.common.util.date.DateUtils;
import cn.iocoder.yudao.framework.common.util.string.StrUtils;
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -39,11 +34,6 @@ import org.springframework.transaction.support.TransactionSynchronizationManager
import org.springframework.validation.annotation.Validated;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
/**
* 数字人任务 Service 实现
@@ -57,12 +47,12 @@ import java.util.concurrent.TimeUnit;
public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
private final TikDigitalHumanTaskMapper taskMapper;
private final TikUserVoiceMapper voiceMapper;
private final TikUserFileMapper userFileMapper;
private final FileMapper fileMapper;
private final FileApi fileApi;
private final TikUserVoiceService userVoiceService;
private final LatentsyncService latentsyncService;
private final TikOssInitService ossInitService;
/**
* 预签名URL过期时间24小时
@@ -97,16 +87,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
@Override
public AppTikDigitalHumanRespVO getTask(Long taskId) {
TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
if (task == null) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
}
Long userId = SecurityFrameworkUtils.getLoginUserId();
if (!task.getUserId().equals(userId)) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
}
TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
return convertToRespVO(task);
}
@@ -157,15 +138,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
@Override
@Transactional(rollbackFor = Exception.class)
public void cancelTask(Long taskId) {
TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
if (task == null) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
}
Long userId = SecurityFrameworkUtils.getLoginUserId();
if (!task.getUserId().equals(userId)) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
}
TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
if (!"PROCESSING".equals(task.getStatus())) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_CANNOT_CANCEL);
@@ -179,21 +152,13 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
updateObj.setCurrentStep("canceled");
taskMapper.updateById(updateObj);
log.info("[cancelTask][用户({})取消任务({})成功]", userId, taskId);
log.info("[cancelTask][用户({})取消任务({})成功]", task.getUserId(), taskId);
}
@Override
@Transactional(rollbackFor = Exception.class)
public void retryTask(Long taskId) {
TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
if (task == null) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
}
Long userId = SecurityFrameworkUtils.getLoginUserId();
if (!task.getUserId().equals(userId)) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
}
TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
if (!"FAILED".equals(task.getStatus()) && !"CANCELED".equals(task.getStatus())) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_CANNOT_RETRY);
@@ -212,26 +177,18 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
// 重新开始异步处理
processTaskAsync(taskId);
log.info("[retryTask][用户({})重试任务({})成功]", userId, taskId);
log.info("[retryTask][用户({})重试任务({})成功]", task.getUserId(), taskId);
}
@Override
@Transactional(rollbackFor = Exception.class)
public void deleteTask(Long taskId) {
TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
if (task == null) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
}
Long userId = SecurityFrameworkUtils.getLoginUserId();
if (!task.getUserId().equals(userId)) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
}
TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
// 删除任务
taskMapper.deleteById(taskId);
log.info("[deleteTask][用户({})删除任务({})成功]", userId, taskId);
log.info("[deleteTask][用户({})删除任务({})成功]", task.getUserId(), taskId);
}
@Override
@@ -242,36 +199,46 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
// ========== 私有方法 ==========
/**
* 获取当前用户拥有的任务
*/
private TikDigitalHumanTaskDO getCurrentUserTask(Long taskId) {
TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
if (task == null) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
}
Long userId = SecurityFrameworkUtils.getLoginUserId();
if (!task.getUserId().equals(userId)) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
}
return task;
}
/**
* 验证任务输入参数
*/
private void validateTaskInput(AppTikDigitalHumanCreateReqVO reqVO, Long userId) {
// 验证文件信息:必须提供音频和视频文件之一
boolean hasAudio = reqVO.getAudioFileId() != null || StrUtil.isNotBlank(reqVO.getAudioUrl());
boolean hasVideo = reqVO.getVideoFileId() != null || StrUtil.isNotBlank(reqVO.getVideoUrl());
if (!hasAudio) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_AUDIO_REQUIRED);
// 验证文本内容(必填)
if (StrUtil.isBlank(reqVO.getInputText())) {
throw new IllegalArgumentException("文案不能为空");
}
// 验证音色ID必填
if (StrUtil.isBlank(reqVO.getVoiceId())) {
throw new IllegalArgumentException("音色ID不能为空");
}
// 验证视频文件(必填)
boolean hasVideo = reqVO.getVideoFileId() != null || StrUtil.isNotBlank(reqVO.getVideoUrl());
if (!hasVideo) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_VIDEO_REQUIRED);
}
// 如果提供了fileId验证文件是否存在且属于用户
if (reqVO.getAudioFileId() != null) {
validateUserFile(reqVO.getAudioFileId(), userId, "音频");
}
// 如果提供了videoFileId验证文件是否存在且属于用户
if (reqVO.getVideoFileId() != null) {
validateUserFile(reqVO.getVideoFileId(), userId, "视频");
}
// 验证配音配置
if (reqVO.getVoiceConfigId() != null) {
TikUserVoiceDO voice = voiceMapper.selectById(reqVO.getVoiceConfigId());
if (voice == null || !voice.getUserId().equals(userId)) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.VOICE_NOT_EXISTS);
}
}
}
/**
@@ -279,7 +246,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
*/
private void validateUserFile(Long fileId, Long userId, String fileType) {
TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
.eq(TikUserFileDO::getId, fileId)
.eq(TikUserFileDO::getFileId, fileId) // 查询fileId字段指向infra_file.id
.eq(TikUserFileDO::getUserId, userId));
if (userFile == null) {
throw ServiceExceptionUtil.exception(ErrorCodeConstants.FILE_NOT_EXISTS, fileType + "文件不存在");
@@ -294,14 +261,14 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
.userId(userId)
.taskName(reqVO.getTaskName())
.aiProvider(StrUtil.blankToDefault(reqVO.getAiProvider(), "302ai"))
.audioFileId(reqVO.getAudioFileId())
.videoFileId(reqVO.getVideoFileId())
.audioUrl(reqVO.getAudioUrl())
.videoUrl(reqVO.getVideoUrl())
.voiceConfigId(reqVO.getVoiceConfigId())
.voiceId(reqVO.getVoiceId())
.inputText(reqVO.getInputText())
.speechRate(reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f)
.volume(reqVO.getVolume() != null ? reqVO.getVolume() : 0f)
.emotion(StrUtil.blankToDefault(reqVO.getEmotion(), "neutral"))
.instruction(reqVO.getInstruction())
.guidanceScale(reqVO.getGuidanceScale() != null ? reqVO.getGuidanceScale() : 1)
.seed(reqVO.getSeed() != null ? reqVO.getSeed() : 8888)
.status("PENDING")
@@ -388,14 +355,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
private void prepareFiles(TikDigitalHumanTaskDO task) throws Exception {
log.info("[prepareFiles][任务({})开始准备文件]", task.getId());
// 如果提供了fileId生成预签名URL
if (task.getAudioFileId() != null) {
FileDO audioFile = fileMapper.selectById(task.getAudioFileId());
if (audioFile != null) {
task.setAudioUrl(fileApi.presignGetUrl(audioFile.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS));
}
}
// 处理视频文件音频由实时TTS生成无需准备
if (task.getVideoFileId() != null) {
FileDO videoFile = fileMapper.selectById(task.getVideoFileId());
if (videoFile != null) {
@@ -403,10 +363,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
}
}
// 验证文件URL
if (StrUtil.isBlank(task.getAudioUrl())) {
throw new Exception("音频文件URL生成失败");
}
// 验证视频文件URL(音频是实时生成,无需验证)
if (StrUtil.isBlank(task.getVideoUrl())) {
throw new Exception("视频文件URL生成失败");
}
@@ -414,7 +371,6 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
// 更新任务记录
TikDigitalHumanTaskDO updateObj = new TikDigitalHumanTaskDO();
updateObj.setId(task.getId());
updateObj.setAudioUrl(task.getAudioUrl());
updateObj.setVideoUrl(task.getVideoUrl());
taskMapper.updateById(updateObj);
@@ -422,18 +378,49 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
}
/**
* 语音合成
* 语音合成使用CosyVoice v3 Flash
*/
private String synthesizeVoice(TikDigitalHumanTaskDO task) throws Exception {
log.info("[synthesizeVoice][任务({})开始语音合成]", task.getId());
// 参数验证
if (StrUtil.isBlank(task.getVoiceId())) {
throw new Exception("音色ID不能为空");
}
if (StrUtil.isBlank(task.getInputText())) {
throw new Exception("输入文本不能为空");
}
// TODO: 调用现有的语音合成服务
// 这里需要根据实际的语音合成API进行集成
log.info("[synthesizeVoice][任务({})开始语音合成][voiceId={}, textLength={}]",
task.getId(), task.getVoiceId(), task.getInputText().length());
// 临时返回音频URL实际应该调用语音合成服务
String audioUrl = task.getAudioUrl();
// 构建TTS请求参数
AppTikVoiceTtsReqVO ttsReqVO = new AppTikVoiceTtsReqVO();
ttsReqVO.setInputText(task.getInputText());
ttsReqVO.setVoiceId(task.getVoiceId());
ttsReqVO.setSpeechRate(task.getSpeechRate() != null ? task.getSpeechRate() : 1.0f);
ttsReqVO.setVolume(task.getVolume() != null ? task.getVolume() : 0f);
ttsReqVO.setInstruction(task.getInstruction());
ttsReqVO.setAudioFormat("mp3");
log.info("[synthesizeVoice][任务({})语音合成完成]", task.getId());
// 调用语音合成服务
AppTikVoiceTtsRespVO ttsRespVO = userVoiceService.synthesizeVoice(ttsReqVO);
if (ttsRespVO == null) {
throw new Exception("语音合成失败,返回结果为空");
}
// 支持Base64和AudioUrl两种返回方式
String audioUrl = null;
if (StrUtil.isNotBlank(ttsRespVO.getAudioUrl())) {
// 优先使用AudioUrl
audioUrl = ttsRespVO.getAudioUrl();
} else if (StrUtil.isNotBlank(ttsRespVO.getAudioBase64())) {
// 如果是Base64需要保存为临时文件并获取URL
audioUrl = saveTempAudioFile(ttsRespVO.getAudioBase64(), ttsRespVO.getFormat());
} else {
throw new Exception("语音合成失败,未返回音频数据");
}
log.info("[synthesizeVoice][任务({})语音合成完成][audioUrl={}]", task.getId(), audioUrl);
return audioUrl;
}
@@ -482,12 +469,54 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
reqVO.setGuidanceScale(task.getGuidanceScale());
reqVO.setSeed(task.getSeed());
// 调用Latentsync服务
// 调用Latentsync服务提交任务
AppTikLatentsyncSubmitRespVO response = latentsyncService.submitTask(reqVO);
String requestId = response.getRequestId();
// 等待处理完成这里需要根据实际的Latentsync API调整
// 临时返回处理后的视频URL
return task.getVideoUrl();
log.info("[syncWithLatentsync][任务({})提交成功requestId={}]", task.getId(), requestId);
// 轮询等待任务完成
int maxAttempts = 60; // 最多轮询60次
int attempt = 0;
while (attempt < maxAttempts) {
attempt++;
try {
// 获取任务结果
AppTikLatentsyncResultRespVO result = latentsyncService.getTaskResult(requestId);
String status = result.getStatus();
log.info("[syncWithLatentsync][任务({})轮询结果: 第{}次, status={}]", task.getId(), attempt, status);
if ("COMPLETED".equals(status)) {
// 任务完成获取视频URL
String videoUrl = result.getVideo().getUrl();
if (StrUtil.isNotBlank(videoUrl)) {
log.info("[syncWithLatentsync][任务({})口型同步完成videoUrl={}]", task.getId(), videoUrl);
return videoUrl;
} else {
throw new Exception("Latentsync 返回视频URL为空");
}
} else if ("FAILED".equals(status)) {
throw new Exception("Latentsync 任务处理失败");
}
// 等待5秒后再次轮询
Thread.sleep(5000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new Exception("等待Latentsync结果时被中断", e);
} catch (Exception e) {
log.error("[syncWithLatentsync][任务({})轮询异常: {}]", task.getId(), e.getMessage(), e);
// 如果是最后一次尝试,抛出异常
if (attempt >= maxAttempts) {
throw new Exception("等待Latentsync结果超时: " + e.getMessage(), e);
}
// 否则等待后重试
Thread.sleep(5000);
}
}
throw new Exception("等待Latentsync结果超时");
}
/**
@@ -498,8 +527,8 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
// TODO: 这里可以添加视频后处理逻辑,比如添加字幕、特效等
// 临时返回同步后的视频URL
String resultVideoUrl = syncedVideoUrl;
// 保存同步后的视频到OSS
String resultVideoUrl = saveVideoToOss(task, syncedVideoUrl);
log.info("[generateVideo][任务({})视频生成完成]", task.getId());
return resultVideoUrl;
@@ -539,6 +568,25 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
log.info("[updateTaskStatus][任务({})状态更新: {}]", taskId, updateObj);
}
/**
* 保存Base64音频数据为临时文件并返回访问URL
*/
private String saveTempAudioFile(String audioBase64, String format) throws Exception {
try {
// 解码Base64为字节数组
byte[] audioBytes = java.util.Base64.getDecoder().decode(audioBase64);
// 生成临时文件名
String fileName = "temp_audio_" + System.currentTimeMillis() + "." + (StrUtil.isNotBlank(format) ? format : "mp3");
// 保存到临时目录
String directory = "temp/audio";
// 保存文件并返回访问URL
return fileApi.createFile(audioBytes, fileName, directory, "audio/" + format);
} catch (Exception e) {
log.error("[saveTempAudioFile][保存音频文件失败]", e);
throw new Exception("保存音频文件失败:" + e.getMessage());
}
}
/**
* 更新任务进度
*/
@@ -546,4 +594,57 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
updateTaskStatus(taskId, "PROCESSING", step.getStep(), step.getProgress(), message, null);
}
/**
* 保存视频到OSS
*/
private String saveVideoToOss(TikDigitalHumanTaskDO task, String remoteVideoUrl) throws Exception {
log.info("[saveVideoToOss][任务({})开始下载并保存视频到OSS][remoteUrl={}]", task.getId(), remoteVideoUrl);
try {
// 1. 下载远程视频文件
byte[] videoBytes = downloadRemoteFile(remoteVideoUrl);
// 2. 获取OSS目录路径使用"generate"分类,符合数字人生成的语义)
Long userId = task.getUserId();
String baseDirectory = ossInitService.getOssDirectoryByCategory(userId, "generate");
// 3. 生成文件名格式task_{taskId}_{timestamp}.mp4
String fileName = String.format("task_%d_%d.mp4", task.getId(), System.currentTimeMillis());
// 4. 保存到OSS
String ossUrl = fileApi.createFile(videoBytes, fileName, baseDirectory, "video/mp4");
// 5. 移除预签名URL中的签名参数获取基础URL用于存储
String cleanOssUrl = HttpUtils.removeUrlQuery(ossUrl);
log.info("[saveVideoToOss][任务({})视频保存到OSS完成][directory={}, fileName={}, ossUrl={}]",
task.getId(), baseDirectory, fileName, cleanOssUrl);
return cleanOssUrl;
} catch (Exception e) {
log.error("[saveVideoToOss][任务({})保存视频到OSS失败][remoteUrl={}]", task.getId(), remoteVideoUrl, e);
// 如果保存失败返回原始URL降级处理
return remoteVideoUrl;
}
}
/**
* 下载远程文件
*/
private byte[] downloadRemoteFile(String remoteUrl) throws Exception {
log.info("[downloadRemoteFile][下载文件][url={}]", remoteUrl);
try (HttpResponse response = HttpRequest.get(remoteUrl)
.execute()) {
if (!response.isOk()) {
throw new Exception("下载文件失败: HTTP " + response.getStatus());
}
byte[] bytes = response.bodyBytes();
log.info("[downloadRemoteFile][文件下载完成][size={} bytes]", bytes.length);
return bytes;
}
}
}

View File

@@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.tik.voice.service;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncResultRespVO;
/**
* Latentsync 口型同步 Service
@@ -15,6 +16,14 @@ public interface LatentsyncService {
* @return 任务响应
*/
AppTikLatentsyncSubmitRespVO submitTask(AppTikLatentsyncSubmitReqVO reqVO);
/**
* 获取 Latentsync 任务结果
*
* @param requestId 任务 ID
* @return 任务结果
*/
AppTikLatentsyncResultRespVO getTaskResult(String requestId);
}

View File

@@ -4,9 +4,7 @@ import cn.hutool.core.util.StrUtil;
import cn.iocoder.yudao.module.tik.voice.client.LatentsyncClient;
import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitRequest;
import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitResponse;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
import jakarta.validation.Valid;
import cn.iocoder.yudao.module.tik.voice.vo.*;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.validation.annotation.Validated;
@@ -22,7 +20,7 @@ public class LatentsyncServiceImpl implements LatentsyncService {
private final LatentsyncClient latentsyncClient;
@Override
public AppTikLatentsyncSubmitRespVO submitTask(@Valid AppTikLatentsyncSubmitReqVO reqVO) {
public AppTikLatentsyncSubmitRespVO submitTask(AppTikLatentsyncSubmitReqVO reqVO) {
LatentsyncSubmitRequest request = LatentsyncSubmitRequest.builder()
.audioUrl(StrUtil.trim(reqVO.getAudioUrl()))
.videoUrl(StrUtil.trim(reqVO.getVideoUrl()))
@@ -37,6 +35,27 @@ public class LatentsyncServiceImpl implements LatentsyncService {
respVO.setQueuePosition(response.getQueuePosition());
return respVO;
}
@Override
public AppTikLatentsyncResultRespVO getTaskResult(String requestId) {
LatentsyncSubmitResponse response = latentsyncClient.getTaskResult(requestId);
AppTikLatentsyncResultRespVO respVO = new AppTikLatentsyncResultRespVO();
respVO.setRequestId(response.getRequestId());
respVO.setStatus(response.getStatus());
respVO.setSeed(response.getSeed());
// 转换视频信息
if (response.getVideo() != null) {
AppTikLatentsyncResultRespVO.VideoInfo videoInfo = new AppTikLatentsyncResultRespVO.VideoInfo();
videoInfo.setUrl(response.getVideo().getUrl());
videoInfo.setContentType(response.getVideo().getContentType());
videoInfo.setFileSize(response.getVideo().getFileSize());
respVO.setVideo(videoInfo);
}
return respVO;
}
}

View File

@@ -27,7 +27,6 @@ import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
import cn.iocoder.yudao.module.tik.voice.util.ByteArrayMultipartFile;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
@@ -146,7 +145,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
cloneRequest.setTargetModel("cosyvoice-v2"); // 使用v2模型效果更好
cloneRequest.setTargetModel("cosyvoice-v3-flash"); // 使用v3-flash模型
cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求
cloneRequest.setUrl(fileAccessUrl);
@@ -416,7 +415,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
transcriptionText,
reqVO.getInputText(),
false);
finalText = appendEmotion(finalText, reqVO.getEmotion());
// 移除appendEmotion调用情感通过instruction参数传递
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
voiceId,
@@ -424,7 +423,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
finalText,
reqVO.getSpeechRate(),
reqVO.getVolume(),
reqVO.getEmotion(),
reqVO.getInstruction(),
reqVO.getAudioFormat(),
reqVO.getSampleRate());
@@ -441,7 +440,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
reqVO.getModel(),
reqVO.getSpeechRate(),
reqVO.getVolume(),
reqVO.getEmotion(),
reqVO.getInstruction(),
reqVO.getSampleRate(),
reqVO.getAudioFormat(),
false
@@ -563,8 +562,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
true);
// 使用请求参数或默认值
String emotion = StrUtil.blankToDefault(reqVO.getEmotion(), "neutral");
finalText = appendEmotion(finalText, emotion);
String instruction = reqVO.getInstruction();
// 注意instruction参数现在直接传递给CosyVoice不再添加到文本中
Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
@@ -576,7 +575,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
finalText,
speechRate,
volume,
emotion,
instruction,
audioFormat,
null);
PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
@@ -599,7 +598,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
null, // 使用默认模型
speechRate,
volume,
emotion,
instruction,
null,
audioFormat,
true
@@ -630,7 +629,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
String model,
Float speechRate,
Float volume,
String emotion,
String instruction,
Integer sampleRate,
String audioFormat,
boolean preview) {
@@ -642,7 +641,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.model(model)
.speechRate(speechRate)
.volume(volume)
.emotion(emotion)
.instruction(instruction)
.sampleRate(sampleRate)
.audioFormat(audioFormat)
.preview(preview)
@@ -738,7 +737,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
String text,
Float speechRate,
Float volume,
String emotion,
String instruction,
String audioFormat,
Integer sampleRate) {
// 构建标识符优先使用voiceId如果没有则使用fileUrl的稳定部分去除查询参数
@@ -757,7 +756,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
text,
speechRate != null ? speechRate : "1.0",
volume != null ? volume : "0",
StrUtil.blankToDefault(emotion, "neutral"),
instruction,
StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
String hash = cn.hutool.crypto.SecureUtil.sha256(payload);

View File

@@ -26,22 +26,20 @@ public class AppTikDigitalHumanCreateReqVO {
@Schema(description = "AI供应商默认302ai", example = "302ai", allowableValues = {"302ai", "aliyun", "openai", "minimax"})
private String aiProvider;
@Schema(description = "频文件IDtik_user_file.id与audioUrl二选一", example = "123")
private Long audioFileId;
@Schema(description = "音频文件URL公网可访问与audioFileId二选一", example = "https://example.com/audio.wav")
@Size(max = 1024, message = "音频URL不能超过1024个字符")
private String audioUrl;
@Schema(description = "视频文件IDtik_user_file.id与videoUrl二选一", example = "456")
@Schema(description = "频文件IDtik_user_file.id", example = "456")
private Long videoFileId;
@Schema(description = "视频文件URL公网可访问与videoFileId二选一", example = "https://example.com/video.mp4")
@Schema(description = "视频文件URL公网可访问与videoFileId二选一", example = "https://example.com/video.mp4")
@Size(max = 1024, message = "视频URL不能超过1024个字符")
private String videoUrl;
@Schema(description = "配音配置IDtik_user_voice.id", example = "789")
private Long voiceConfigId;
@Schema(description = "音色IDCosyVoice voiceId", example = "cosyvoice-v3-flash-sys-xxx")
private String voiceId;
@Schema(description = "输入文本(用于语音合成,文案必填)", example = "您好,欢迎体验数字人")
@NotBlank(message = "文案不能为空")
@Size(max = 4000, message = "文本不能超过4000个字符")
private String inputText;
@Schema(description = "语速0.5-2.0默认1.0", example = "1.0")
@DecimalMin(value = "0.5", message = "语速不能小于0.5")
@@ -64,4 +62,7 @@ public class AppTikDigitalHumanCreateReqVO {
@Schema(description = "随机种子默认8888", example = "8888")
private Integer seed;
@Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读")
private String instruction;
}

View File

@@ -37,7 +37,7 @@ public class AppTikDigitalHumanRespVO {
@Schema(description = "配音配置ID", example = "789")
private Long voiceConfigId;
@Schema(description = "voice_id", example = "cosyvoice-v2-xxx")
@Schema(description = "voice_id", example = "cosyvoice-v3-flash-xxx")
private String voiceId;
@Schema(description = "语速", example = "1.0")

View File

@@ -0,0 +1,36 @@
package cn.iocoder.yudao.module.tik.voice.vo;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
/**
* Latentsync 任务结果响应 VO
*/
@Data
public class AppTikLatentsyncResultRespVO {
@Schema(description = "Latentsync 任务 ID", example = "8eed0b9b-6103-4357-a57b-9f135a8c3276")
private String requestId;
@Schema(description = "官方状态,如 IN_QUEUE、PROCESSING、COMPLETED、FAILED", example = "COMPLETED")
private String status;
@Schema(description = "种子值")
private Integer seed;
@Schema(description = "视频信息")
private VideoInfo video;
@Schema(description = "视频信息")
@Data
public static class VideoInfo {
@Schema(description = "视频URL")
private String url;
@Schema(description = "内容类型", example = "video/mp4")
private String contentType;
@Schema(description = "文件大小(字节)")
private Integer fileSize;
}
}

View File

@@ -39,6 +39,9 @@ public class AppTikVoicePreviewReqVO {
@Schema(description = "音频格式可选默认mp3", example = "mp3")
private String audioFormat;
@Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读")
private String instruction;
}

View File

@@ -21,20 +21,20 @@ public class AppTikVoiceTtsReqVO {
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
private String transcriptionText;
@Schema(description = "音色 IDCosyVoice voiceId", example = "cosyvoice-v2-myvoice-xxx")
@Schema(description = "音色 IDCosyVoice voiceId", example = "cosyvoice-v3-flash-myvoice-xxx")
private String voiceId;
@Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)")
private String fileUrl;
@Schema(description = "模型名称,默认 cosyvoice-v2", example = "cosyvoice-v3")
@Schema(description = "模型名称,默认 cosyvoice-v3-flash", example = "cosyvoice-v3-flash")
private String model;
@Schema(description = "语速,默认 1.0", example = "1.0")
private Float speechRate;
@Schema(description = "情感", example = "happy")
private String emotion;
@Schema(description = "指令(用于控制音色风格)", example = "请用温柔专业的语调朗读")
private String instruction;
@Schema(description = "音量调节范围 [-10,10]", example = "0")
private Float volume;

View File

@@ -216,7 +216,7 @@ yudao:
cosyvoice:
enabled: true
api-key: sk-10c746f8cb8640738f8d6b71af699003
default-model: cosyvoice-v2
default-model: cosyvoice-v3-flash
sample-rate: 24000
audio-format: mp3
preview-text: 您好,欢迎体验专属音色