From a3cc6c6db0f0527b77a83766e63c7c79b74c1adf Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Sat, 22 Nov 2025 00:25:29 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 frontend/app/web-gold/src/api/digitalHuman.js |  14 +-
 frontend/app/web-gold/src/views/dh/Video.vue  | 185 ++++++-----
 .../file/service/TikUserFileServiceImpl.java  |   2 +-
 .../tik/voice/client/CosyVoiceClient.java     |   8 +-
 .../tik/voice/client/LatentsyncClient.java    |  96 +++++-
 .../client/dto/CosyVoiceCloneRequest.java     |   2 +-
 .../voice/client/dto/CosyVoiceTtsRequest.java |   6 +-
 .../client/dto/LatentsyncSubmitResponse.java  |  22 ++
 .../tik/voice/config/CosyVoiceProperties.java |   4 +-
 .../voice/config/LatentsyncProperties.java    |   2 +-
 .../dal/dataobject/TikDigitalHumanTaskDO.java |  30 +-
 .../tik/voice/enums/CosyVoiceEmotionEnum.java |  38 +++
 .../service/DigitalHumanTaskServiceImpl.java  | 309 ++++++++++++------
 .../tik/voice/service/LatentsyncService.java  |   9 +
 .../voice/service/LatentsyncServiceImpl.java  |  27 +-
 .../service/TikUserVoiceServiceImpl.java      |  25 +-
 .../vo/AppTikDigitalHumanCreateReqVO.java     |  23 +-
 .../voice/vo/AppTikDigitalHumanRespVO.java    |   2 +-
 .../vo/AppTikLatentsyncResultRespVO.java      |  36 ++
 .../tik/voice/vo/AppTikVoicePreviewReqVO.java |   3 +
 .../tik/voice/vo/AppTikVoiceTtsReqVO.java     |   8 +-
 .../src/main/resources/application.yaml       |   2 +-
 22 files changed, 595 insertions(+), 258 deletions(-)
 create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java
 create mode 100644 yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncResultRespVO.java

diff --git a/frontend/app/web-gold/src/api/digitalHuman.js b/frontend/app/web-gold/src/api/digitalHuman.js
index ec9d5f8159..95805e2772 100644
--- a/frontend/app/web-gold/src/api/digitalHuman.js
+++ b/frontend/app/web-gold/src/api/digitalHuman.js
@@ -8,7 +8,7 @@ import request from './http'
  */
 export function createDigitalHumanTask(data) {
   return request({
-    url: '/api/tik/digital-human/task/create',
+    url: '/webApi/api/tik/digital-human/task/create',
     method: 'post',
     data
   })
@@ -19,7 +19,7 @@ export function createDigitalHumanTask(data) {
  */
 export function getDigitalHumanTask(taskId) {
   return request({
-    url: '/api/tik/digital-human/task/get',
+    url: '/webApi/api/tik/digital-human/task/get',
     method: 'get',
     params: { taskId }
   })
@@ -30,7 +30,7 @@ export function getDigitalHumanTask(taskId) {
  */
 export function getDigitalHumanTaskPage(params) {
   return request({
-    url: '/api/tik/digital-human/task/page',
+    url: '/webApi/api/tik/digital-human/task/page',
     method: 'get',
     params
   })
@@ -41,7 +41,7 @@ export function getDigitalHumanTaskPage(params) {
  */
 export function getTaskStatistics() {
   return request({
-    url: '/api/tik/digital-human/task/statistics',
+    url: '/webApi/api/tik/digital-human/task/statistics',
     method: 'get'
   })
 }
@@ -51,7 +51,7 @@ export function getTaskStatistics() {
  */
 export function cancelTask(taskId) {
   return request({
-    url: `/api/tik/digital-human/task/${taskId}/cancel`,
+    url: `/webApi/api/tik/digital-human/task/${taskId}/cancel`,
     method: 'post'
   })
 }
@@ -61,7 +61,7 @@ export function cancelTask(taskId) {
  */
 export function retryTask(taskId) {
   return request({
-    url: `/api/tik/digital-human/task/${taskId}/retry`,
+    url: `/webApi/api/tik/digital-human/task/${taskId}/retry`,
     method: 'post'
   })
 }
@@ -71,7 +71,7 @@ export function retryTask(taskId) {
  */
 export function deleteTask(taskId) {
   return request({
-    url: `/api/tik/digital-human/task/${taskId}`,
+    url: `/webApi/api/tik/digital-human/task/${taskId}`,
     method: 'delete'
   })
 }
diff --git a/frontend/app/web-gold/src/views/dh/Video.vue b/frontend/app/web-gold/src/views/dh/Video.vue
index de39837c04..b9824f5ac9 100644
--- a/frontend/app/web-gold/src/views/dh/Video.vue
+++ b/frontend/app/web-gold/src/views/dh/Video.vue
@@ -26,19 +26,23 @@ const isPlayingPreview = ref(false) // 是否正在播放试听音频
 const isPlayingSynthesized = ref(false) // 是否正在播放已合成的音频
 const pollingInterval = ref(null) // 轮询间隔ID
 
+// Base64音频缓存
+const audioBase64Cache = new Map()
+const AUDIO_CACHE_MAX_SIZE = 10 // 最多缓存10个音频
+
 // TTS 配置
 const ttsText = ref('')
 const selectedTtsVoice = ref('')
 const speechRate = ref(1.0)
-const emotion = ref('neutral')
+const instruction = ref('neutral') // 指令参数，用于控制音色风格
 const voiceSource = ref('user')
 
-// 系统音色库
+// 系统音色库（使用CosyVoice v3-flash模型）
 const SYSTEM_VOICES = [
-  { id: 'sys-pro-01', name: '星悦·知性女声', gender: 'female', category: '职业', description: '温柔专业', voiceId: 'cosyvoice-v2-sys-pro-01' },
-  { id: 'sys-boy-01', name: '澄澄·少男音', gender: 'male', category: '少男', description: '年轻清爽', voiceId: 'cosyvoice-v2-sys-boy-01' },
-  { id: 'sys-girl-01', name: '沁雪·少女音', gender: 'female', category: '少女', description: '活泼甜美', voiceId: 'cosyvoice-v2-sys-girl-01' },
-  { id: 'sys-man-01', name: '寰宇·男青年', gender: 'male', category: '男青年', description: '磁性沉稳', voiceId: 'cosyvoice-v2-sys-man-01' }
+  { id: 'sys-pro-01', name: '星悦·知性女声', gender: 'female', category: '职业', description: '温柔专业', voiceId: 'cosyvoice-v3-flash-sys-pro-01', defaultInstruction: '请用温柔专业的语调朗读' },
+  { id: 'sys-boy-01', name: '澄澄·少男音', gender: 'male', category: '少男', description: '年轻清爽', voiceId: 'cosyvoice-v3-flash-sys-boy-01', defaultInstruction: '请用年轻清爽的语调朗读' },
+  { id: 'sys-girl-01', name: '沁雪·少女音', gender: 'female', category: '少女', description: '活泼甜美', voiceId: 'cosyvoice-v3-flash-sys-girl-01', defaultInstruction: '请用活泼甜美的语调朗读' },
+  { id: 'sys-man-01', name: '寰宇·男青年', gender: 'male', category: '男青年', description: '磁性沉稳', voiceId: 'cosyvoice-v3-flash-sys-man-01', defaultInstruction: '请用磁性沉稳的语调朗读' }
 ]
 
 // 用户音色列表
@@ -57,7 +61,15 @@ const userVoiceCards = computed(() =>
   }))
 )
 
-const displayedVoices = computed(() => userVoiceCards.value)
+const displayedVoices = computed(() => {
+  if (voiceSource.value === 'system') {
+    return SYSTEM_VOICES.map(voice => ({
+      ...voice,
+      source: 'system'
+    }))
+  }
+  return userVoiceCards.value
+})
 
 const selectedVoiceMeta = computed(() =>
   displayedVoices.value.find(voice => `${voice.source}-${voice.id}` === selectedTtsVoice.value)
@@ -66,7 +78,16 @@ const selectedVoiceMeta = computed(() =>
 // UI 状态
 const speechRateMarks = { 0.5: '0.5x', 1: '1x', 1.5: '1.5x', 2: '2x' }
 const speechRateDisplay = computed(() => `${speechRate.value.toFixed(1)}x`)
-const canGenerate = computed(() => !!(synthesizedAudio.value?.fileId && uploadedVideo.value && !isGenerating.value))
+
+// 生成数字人的条件：选中了音色 + 上传了视频 + 没有正在生成
+// 注意：不需要先合成语音，可以直接使用音色配置
+const canGenerate = computed(() => {
+  const hasText = ttsText.value.trim() // 文案必填
+  const hasVoice = selectedVoiceMeta.value // 必须选中音色
+  const hasVideo = uploadedVideo.value // 必须上传视频
+  const notGenerating = !isGenerating.value // 不能正在生成
+  return !!(hasText && hasVoice && hasVideo && notGenerating)
+})
 
 // 音色选择
 const setVoiceSource = (source) => {
@@ -75,6 +96,8 @@ const setVoiceSource = (source) => {
   selectedTtsVoice.value = ''
   if (source === 'user' && userVoiceCards.value.length > 0) {
     selectVoiceProfile(userVoiceCards.value[0])
+  } else if (source === 'system' && SYSTEM_VOICES.length > 0) {
+    selectVoiceProfile({ ...SYSTEM_VOICES[0], source: 'system' })
   }
 }
 
@@ -89,12 +112,8 @@ const playVoiceSample = async (voice) => {
   if (previewLoadingVoiceId.value === voice.id || isPlayingPreview.value) {
     return
   }
-  if (voice.source === 'user' || (voice.source === 'system' && voice.voiceId)) {
-    return triggerVoicePreview(voice)
-  }
-  const url = voice.previewUrl || voice.fileUrl
-  if (!url) return message.warning('暂无可试听的音频')
-  playAudioPreview(url)
+  // 用户音色和系统音色都走实时试听流程
+  return triggerVoicePreview(voice)
 }
 
 const triggerVoicePreview = async (voice) => {
@@ -137,6 +156,7 @@ const triggerVoicePreview = async (voice) => {
 const buildPreviewParams = (voice) => {
   if (voice.source === 'user') {
     // 使用voiceConfigId，让后端查询数据库获取文件URL和transcriptionText
+    // 用户音色不传instruction
     const configId = voice.rawId || extractIdFromString(voice.id)
     if (!configId) {
       message.error('配音配置无效')
@@ -145,15 +165,15 @@ const buildPreviewParams = (voice) => {
     return {
       voiceConfigId: configId,
       inputText: ttsText.value,  // 传递用户输入的文本
-      emotion: emotion.value || 'neutral',
       speechRate: speechRate.value || 1.0,
       audioFormat: 'mp3'
     }
   } else {
+    // 系统音色使用用户选择的instruction
     return {
       voiceId: voice.voiceId,
       inputText: ttsText.value,  // 传递用户输入的文本
-      emotion: emotion.value || 'neutral',
+      instruction: instruction.value && instruction.value !== 'neutral' ? instruction.value : (voice.defaultInstruction || '请用自然流畅的语调朗读'),
       speechRate: speechRate.value || 1.0,
       audioFormat: 'mp3'
     }
@@ -177,11 +197,10 @@ const handleSynthesizeVoice = async () => {
     const params = {
       inputText: ttsText.value,
       speechRate: speechRate.value,
-      emotion: emotion.value,
       audioFormat: 'mp3'
     }
 
-    // 如果是用户配音，使用voiceConfigId让后端查询
+    // 如果是用户配音，使用voiceConfigId让后端查询，不传instruction
     if (voice.source === 'user') {
       const configId = voice.rawId || extractIdFromString(voice.id)
       if (!configId) {
@@ -190,14 +209,14 @@ const handleSynthesizeVoice = async () => {
       }
       params.voiceConfigId = configId
     } else {
-      // 使用系统音色voiceId
+      // 使用系统音色voiceId和用户选择的instruction
       const voiceId = voice.voiceId || voice.rawId
       if (!voiceId) {
         message.warning('音色配置无效')
         return
       }
       params.voiceId = voiceId
-      params.model = voice.model
+      params.instruction = instruction.value && instruction.value !== 'neutral' ? instruction.value : (voice.defaultInstruction || '请用自然流畅的语调朗读')
     }
 
     const res = await VoiceService.synthesize(params)
@@ -206,7 +225,7 @@ const handleSynthesizeVoice = async () => {
       synthesizedAudio.value = res.data
       message.success('语音合成成功')
     } else {
-      message.error(res.msg || '合成失败')
+      message.error(res.message || '合成失败')
     }
   } catch (error) {
     console.error('synthesize error:', error)
@@ -289,22 +308,8 @@ const generateVideo = async () => {
   currentTaskStep.value = 'prepare_files'
 
   try {
-    // 1. 首先上传音频和视频文件到后端
-    message.loading('正在上传文件...', 0)
-    
-    // 上传音频（使用合成后的音频或原始音频）
-    let audioFileId = null
-    let audioUrl = null
-    
-    if (synthesizedAudio.value?.fileId) {
-      // 如果有已合成的音频，使用其fileId
-      audioFileId = synthesizedAudio.value.fileId
-    } else {
-      // 否则使用voiceConfigId让后端处理
-      audioFileId = voice.rawId || extractIdFromString(voice.id)
-    }
-    
-    // 上传视频文件
+    // 1. 上传视频文件（只上传视频，音频由后端实时合成）
+    message.loading('正在上传视频...', 0)
     const videoFileId = await uploadVideoFile(uploadedVideoFile.value)
     if (!videoFileId) {
       throw new Error('视频上传失败')
@@ -312,13 +317,15 @@ const generateVideo = async () => {
 
     message.destroy()
 
-    // 2. 创建数字人任务
+    // 2. 创建数字人任务（简化：只使用voiceId，后端实时TTS）
     const taskData = {
       taskName: `数字人任务_${Date.now()}`,
-      audioFileId: audioFileId,
       videoFileId: videoFileId,
+      // 音频由后端实时合成，使用voiceId
+      voiceId: voice.voiceId || voice.rawId,
+      inputText: ttsText.value, // 文本内容（用于TTS合成）
       speechRate: speechRate.value,
-      emotion: emotion.value,
+      instruction: voice.source === 'user' ? undefined : (instruction.value && instruction.value !== 'neutral' ? instruction.value : (voice.defaultInstruction || '请用自然流畅的语调朗读')),
       guidanceScale: 1,
       seed: 8888
     }
@@ -350,10 +357,10 @@ const generateVideo = async () => {
 const uploadVideoFile = async (file) => {
   try {
     const res = await MaterialService.uploadFile(file, 'video')
-    if (res.code === 0 && res.data?.id) {
-      return res.data.id
+    if (res.code === 0) {
+      return res.data  // res.data就是文件ID
     } else {
-      throw new Error(res.msg || '上传失败')
+      throw new Error(res.message || '上传失败')
     }
   } catch (error) {
     console.error('uploadVideoFile error:', error)
@@ -528,17 +535,42 @@ const playAudioPreview = (url, options = {}) => {
 
 const playAudioFromBase64 = (audioBase64, format = 'mp3', onEnded = null) => {
   try {
-    previewObjectUrl && URL.revokeObjectURL(previewObjectUrl)
-    const byteCharacters = window.atob(audioBase64)
-    const byteNumbers = new Array(byteCharacters.length)
-    for (let i = 0; i < byteCharacters.length; i++) {
-      byteNumbers[i] = byteCharacters.charCodeAt(i)
+    // 检查缓存
+    const cacheKey = `${audioBase64.substring(0, 32)}_${format}` // 使用base64前32位作为缓存键
+    let objectUrl = audioBase64Cache.get(cacheKey)
+
+    if (!objectUrl) {
+      // 解码base64并创建blob
+      const byteCharacters = window.atob(audioBase64)
+      const byteNumbers = new Array(byteCharacters.length)
+      for (let i = 0; i < byteCharacters.length; i++) {
+        byteNumbers[i] = byteCharacters.charCodeAt(i)
+      }
+      const mime = format === 'mp3' ? 'audio/mpeg' : `audio/${format}`
+      const blob = new Blob([new Uint8Array(byteNumbers)], { type: mime })
+      objectUrl = URL.createObjectURL(blob)
+
+      // 管理缓存大小
+      if (audioBase64Cache.size >= AUDIO_CACHE_MAX_SIZE) {
+        // 清理最早的缓存
+        const firstKey = audioBase64Cache.keys().next().value
+        const oldUrl = audioBase64Cache.get(firstKey)
+        URL.revokeObjectURL(oldUrl)
+        audioBase64Cache.delete(firstKey)
+      }
+
+      // 存储到缓存
+      audioBase64Cache.set(cacheKey, objectUrl)
     }
-    const mime = format === 'mp3' ? 'audio/mpeg' : `audio/${format}`
-    const blob = new Blob([new Uint8Array(byteNumbers)], { type: mime })
-    previewObjectUrl = URL.createObjectURL(blob)
+
+    // 清理旧的previewObjectUrl
+    if (previewObjectUrl && previewObjectUrl !== objectUrl) {
+      URL.revokeObjectURL(previewObjectUrl)
+    }
+    previewObjectUrl = objectUrl
+
     playAudioPreview(previewObjectUrl, {
-      revokeOnEnd: true,
+      revokeOnEnd: false, // 缓存模式下不立即释放
       onEnded: () => {
         isPlayingPreview.value = false
         onEnded && onEnded()
@@ -555,12 +587,20 @@ const playAudioFromBase64 = (audioBase64, format = 'mp3', onEnded = null) => {
 // 生命周期
 onMounted(async () => {
   await voiceStore.load()
-  userVoiceCards.value.length > 0 && selectVoiceProfile(userVoiceCards.value[0])
+  // 默认选择第一个音色
+  if (voiceSource.value === 'user' && userVoiceCards.value.length > 0) {
+    selectVoiceProfile(userVoiceCards.value[0])
+  } else if (voiceSource.value === 'system' && SYSTEM_VOICES.length > 0) {
+    selectVoiceProfile({ ...SYSTEM_VOICES[0], source: 'system' })
+  }
 })
 
 onUnmounted(() => {
   previewAudio?.pause?.()
   previewAudio = null
+  // 清理所有缓存的ObjectURL
+  audioBase64Cache.forEach(url => URL.revokeObjectURL(url))
+  audioBase64Cache.clear()
   previewObjectUrl && URL.revokeObjectURL(previewObjectUrl)
   // 重置播放状态
   isPlayingPreview.value = false
@@ -575,12 +615,17 @@ onUnmounted(() => {
 // 监听器
 watch(voiceSource, () => {
   selectedTtsVoice.value = ''
-  userVoiceCards.value.length > 0 && selectVoiceProfile(userVoiceCards.value[0])
+  if (voiceSource.value === 'user' && userVoiceCards.value.length > 0) {
+    selectVoiceProfile(userVoiceCards.value[0])
+  } else if (voiceSource.value === 'system' && SYSTEM_VOICES.length > 0) {
+    selectVoiceProfile({ ...SYSTEM_VOICES[0], source: 'system' })
+  }
 })
 
 watch(() => voiceStore.profiles, () => {
-  voiceSource.value === 'user' && userVoiceCards.value.length > 0 &&
-    !selectedTtsVoice.value && selectVoiceProfile(userVoiceCards.value[0])
+  if (voiceSource.value === 'user' && userVoiceCards.value.length > 0 && !selectedTtsVoice.value) {
+    selectVoiceProfile(userVoiceCards.value[0])
+  }
 })
 
 watch([ttsText, selectedTtsVoice], () => {
@@ -613,7 +658,7 @@ let previewObjectUrl = ''
 
             <div class="voice-source-toggle">
               <button
-                v-for="source in ['user']"
+                v-for="source in ['user', 'system']"
                 :key="source"
                 class="source-btn"
                 :class="{ active: voiceSource === source }"
@@ -623,8 +668,8 @@ let previewObjectUrl = ''
               </button>
             </div>
 
-            <div v-if="userVoiceCards.length === 0" class="empty-voices">
-              还没有配音，可先在"配音管理"中上传
+            <div v-if="displayedVoices.length === 0" class="empty-voices">
+              {{ voiceSource === 'user' ? '还没有配音，可先在"配音管理"中上传' : '暂无可用的系统音色' }}
             </div>
 
             <div class="voice-list">
@@ -678,25 +723,17 @@ let previewObjectUrl = ''
               </div>
             </div>
 
-            <div class="control-group">
-              <div class="control-label">情感</div>
+            <div v-if="voiceSource === 'system'" class="control-group">
+              <div class="control-label">指令</div>
               <div class="emotion-buttons">
                 <button
-                  v-for="em in ['neutral', 'happy', 'angry', 'sad', 'scared', 'disgusted', 'surprised']"
-                  :key="em"
+                  v-for="inst in ['neutral', '请用自然流畅的语调朗读', '请用温柔专业的语调朗读', '请用热情洋溢的语调朗读', '请用低沉磁性的语调朗读', '请用活泼生动的语调朗读']"
+                  :key="inst"
                   class="emotion-btn"
-                  :class="{ active: emotion === em }"
-                  @click="emotion = em"
+                  :class="{ active: instruction === inst }"
+                  @click="instruction = inst"
                 >
-                  {{ {
-                    neutral: '中性',
-                    happy: '高兴',
-                    angry: '愤怒',
-                    sad: '悲伤',
-                    scared: '害怕',
-                    disgusted: '厌恶',
-                    surprised: '惊讶'
-                  }[em] }}
+                  {{ inst === 'neutral' ? '中性' : inst }}
                 </button>
               </div>
             </div>
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/file/service/TikUserFileServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/file/service/TikUserFileServiceImpl.java
index 408ce6c03c..92157d43cc 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/file/service/TikUserFileServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/file/service/TikUserFileServiceImpl.java
@@ -238,7 +238,7 @@ public class TikUserFileServiceImpl implements TikUserFileService {
         quotaService.increaseUsedStorage(userId, file.getSize());
 
         log.info("[saveFileRecord][用户({})保存文件记录成功，文件编号({})，infra文件编号({})]", userId, userFile.getId(), infraFileId);
-        // 返回 infra_file.id，因为创建配音等操作需要使用 infra_file.id
+        // 返回 infra_file.id，保持与现有配音功能的兼容性
         return infraFileId;
     }
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
index d2467f9c1a..133b4a9321 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
@@ -66,12 +66,12 @@ public class CosyVoiceClient {
 
         SpeechSynthesizer synthesizer = null;
         try {
-            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, emotion={}]",
+            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}, speechRate={}, instruction={}]",
                     request.getVoiceId(),
                     request.getText().length(),
                     StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()),
                     request.getSpeechRate(),
-                    request.getEmotion());
+                    request.getInstruction());
 
             // 使用 DashScope SDK 构建参数（严格按文档）
             // 注意：speechRate 和 volume 需要转换为 int 类型
@@ -83,6 +83,10 @@ public class CosyVoiceClient {
                     .volume(request.getVolume() != null ? request.getVolume().intValue() : 0)
                     .build();
 
+            if (StrUtil.isNotBlank(request.getInstruction())) {
+               param.setInstruction(request.getInstruction());
+            }
+ 
             // 初始化合成器（同步调用传 null）
             synthesizer = new SpeechSynthesizer(param, null);
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/LatentsyncClient.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/LatentsyncClient.java
index 3d85c24bb4..51acfe9ac6 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/LatentsyncClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/LatentsyncClient.java
@@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
+import okhttp3.HttpUrl;
 import okhttp3.MediaType;
 import okhttp3.OkHttpClient;
 import okhttp3.Request;
@@ -42,9 +43,7 @@ public class LatentsyncClient {
     private volatile OkHttpClient httpClient;
 
     public LatentsyncSubmitResponse submitTask(LatentsyncSubmitRequest request) {
-        if (!properties.isEnabled()) {
-            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "未配置 Latentsync API Key");
-        }
+        validateEnabled();
         validateRequest(request);
 
         Map<String, Object> payload = buildPayload(request);
@@ -57,28 +56,37 @@ public class LatentsyncClient {
                     .post(RequestBody.create(body.getBytes(StandardCharsets.UTF_8), JSON))
                     .build();
 
-            try (Response response = getHttpClient().newCall(httpRequest).execute()) {
-                String responseBody = response.body() != null ? response.body().string() : "";
-                if (!response.isSuccessful()) {
-                    log.error("[Latentsync][submit failed][status={}, body={}]", response.code(), responseBody);
-                    throw buildException(responseBody);
-                }
-                LatentsyncSubmitResponse submitResponse =
-                        objectMapper.readValue(responseBody, LatentsyncSubmitResponse.class);
+            try {
+                LatentsyncSubmitResponse submitResponse = executeRequest(httpRequest, "submit");
+                // 验证requestId
                 if (StrUtil.isBlank(submitResponse.getRequestId())) {
-                    log.error("[Latentsync][submit failed][response={}]", responseBody);
                     throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "Latentsync 返回 requestId 为空");
                 }
                 return submitResponse;
+            } catch (ServiceException ex) {
+                throw ex;
+            } catch (Exception ex) {
+                log.error("[Latentsync][submit exception]", ex);
+                throw exception(LATENTSYNC_SUBMIT_FAILED);
             }
-        } catch (ServiceException ex) {
-            throw ex;
         } catch (Exception ex) {
-            log.error("[Latentsync][submit exception]", ex);
+            log.error("[Latentsync][build request exception]", ex);
             throw exception(LATENTSYNC_SUBMIT_FAILED);
         }
     }
 
+    private void validateEnabled() {
+        if (!properties.isEnabled()) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "未配置 Latentsync API Key");
+        }
+    }
+
+    private void validateRequestId(String requestId) {
+        if (StrUtil.isBlank(requestId)) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "requestId 不能为空");
+        }
+    }
+
     private void validateRequest(LatentsyncSubmitRequest request) {
         if (request == null) {
             throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "请求体不能为空");
@@ -107,6 +115,64 @@ public class LatentsyncClient {
         return payload;
     }
 
+    /**
+     * 获取任务结果
+     */
+    public LatentsyncSubmitResponse getTaskResult(String requestId) {
+        validateEnabled();
+        validateRequestId(requestId);
+
+        try {
+            // 构建GET请求URL，使用HttpUrl确保参数正确编码
+            HttpUrl url = HttpUrl.parse(properties.getSubmitUrl())
+                    .newBuilder()
+                    .addQueryParameter("request_id", requestId)
+                    .build();
+
+            Request httpRequest = new Request.Builder()
+                    .url(url)
+                    .addHeader("Authorization", "Bearer " + properties.getApiKey())
+                    .get()
+                    .build();
+
+            try {
+                return executeRequest(httpRequest, "get result", requestId);
+            } catch (ServiceException ex) {
+                throw ex;
+            } catch (Exception ex) {
+                log.error("[Latentsync][get result exception]", ex);
+                throw exception(LATENTSYNC_SUBMIT_FAILED);
+            }
+        } catch (Exception ex) {
+            log.error("[Latentsync][build request exception]", ex);
+            throw exception(LATENTSYNC_SUBMIT_FAILED);
+        }
+    }
+
+    /**
+     * 执行HTTP请求的通用方法
+     */
+    private LatentsyncSubmitResponse executeRequest(Request httpRequest, String operation) {
+        return executeRequest(httpRequest, operation, null);
+    }
+
+    private LatentsyncSubmitResponse executeRequest(Request httpRequest, String operation, String requestId) {
+        try (Response response = getHttpClient().newCall(httpRequest).execute()) {
+            String responseBody = response.body() != null ? response.body().string() : "";
+            if (!response.isSuccessful()) {
+                log.error("[Latentsync][{} failed][status={}, body={}]", operation, response.code(), responseBody);
+                throw buildException(responseBody);
+            }
+            log.info("[Latentsync][{} success][requestId={}, responseBody={}]",
+                    operation, requestId, responseBody);
+
+            return objectMapper.readValue(responseBody, LatentsyncSubmitResponse.class);
+        } catch (Exception ex) {
+            log.error("[Latentsync][{} exception]", operation, ex);
+            throw exception(LATENTSYNC_SUBMIT_FAILED);
+        }
+    }
+
     private OkHttpClient getHttpClient() {
         if (httpClient == null) {
             synchronized (this) {
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java
index 9e9156e0a9..30716f18fc 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java
@@ -9,7 +9,7 @@ import lombok.Data;
 public class CosyVoiceCloneRequest {
 
     /**
-     * 复刻模型（cosyvoice-v1 或 cosyvoice-v2）
+     * 复刻模型（cosyvoice-v3-flash 等）
      */
     private String targetModel;
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
index b09591e714..fec199cbed 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
@@ -31,7 +31,7 @@ public class CosyVoiceTtsRequest {
     private String referenceText;
 
     /**
-     * 模型（默认 cosyvoice-v2）
+     * 模型（默认 cosyvoice-v3-flash）
      */
     private String model;
 
@@ -46,9 +46,9 @@ public class CosyVoiceTtsRequest {
     private Float volume;
 
     /**
-     * 情感，可选
+     * 指令（用于控制音色风格），可选
      */
-    private String emotion;
+    private String instruction;
 
     /**
      * 采样率
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitResponse.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitResponse.java
index c749aef99e..ae474ee94f 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitResponse.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitResponse.java
@@ -1,5 +1,6 @@
 package cn.iocoder.yudao.module.tik.voice.client.dto;
 
+import com.fasterxml.jackson.annotation.JsonProperty;
 import lombok.Data;
 
 import java.util.Map;
@@ -28,12 +29,33 @@ public class LatentsyncSubmitResponse {
     /**
      * 任务 ID
      */
+    @JsonProperty("request_id")
     private String requestId;
 
     /**
      * 当前状态
      */
     private String status;
+
+    /**
+     * 视频信息
+     */
+    private VideoInfo video;
+
+    /**
+     * 种子值
+     */
+    private Integer seed;
+
+    /**
+     * 视频信息
+     */
+    @Data
+    public static class VideoInfo {
+        private String url;
+        private String contentType;
+        private Integer fileSize;
+    }
 }
 
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
index 19cc522a35..6ee363dc26 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
@@ -23,7 +23,7 @@ public class CosyVoiceProperties {
     /**
      * 默认模型
      */
-    private String defaultModel = "cosyvoice-v2";
+    private String defaultModel = "cosyvoice-v3-flash";
 
     /**
      * 默认 voiceId（可选）
@@ -38,7 +38,7 @@ public class CosyVoiceProperties {
     /**
      * 默认音频格式
      */
-    private String audioFormat = "wav";
+    private String audioFormat = "mp3";
 
     /**
      * 试听默认示例文本
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/LatentsyncProperties.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/LatentsyncProperties.java
index b9e8d3b099..e5068084af 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/LatentsyncProperties.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/LatentsyncProperties.java
@@ -18,7 +18,7 @@ public class LatentsyncProperties {
     /**
      * 302AI API Key（可通过配置覆盖）
      */
-    private String apiKey = "ab900d8c94094a90aed3e88cdba785c1";
+    private String apiKey = "sk-0IZJ2oo7VCkegFuF3JRsSRtyFUsIvLoHNK8OpulnlsStFN78";
 
     /**
      * 默认海外网关
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java
index 78d3bd825b..1055b75c81 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikDigitalHumanTaskDO.java
@@ -43,32 +43,30 @@ public class TikDigitalHumanTaskDO extends TenantBaseDO {
     private String taskName;
 
     // ========== 文件信息 ==========
-    /**
-     * 音频文件ID（tik_user_file.id）
-     */
-    private Long audioFileId;
     /**
      * 视频文件ID（tik_user_file.id）
      */
     private Long videoFileId;
-    /**
-     * 音频文件URL（公网可访问，用于Latentsync调用）
-     */
-    private String audioUrl;
     /**
      * 视频文件URL（公网可访问，用于Latentsync调用）
      */
     private String videoUrl;
 
-    // ========== 生成参数 ==========
+    // ========== TTS参数 ==========
     /**
-     * 配音配置ID（tik_user_voice.id）
-     */
-    private Long voiceConfigId;
-    /**
-     * CosyVoice生成的voice_id
+     * 音色ID（CosyVoice voiceId）
      */
     private String voiceId;
+    /**
+     * 输入文本（用于语音合成）
+     */
+    private String inputText;
+    /**
+     * 音频文件URL（公网可访问，用于Latentsync调用）
+     */
+    private String audioUrl;
+
+    // ========== 生成参数 ==========
     /**
      * 语速（0.5-2.0）
      */
@@ -81,6 +79,10 @@ public class TikDigitalHumanTaskDO extends TenantBaseDO {
      * 情感（neutral/happy/sad等）
      */
     private String emotion;
+    /**
+     * 指令（用于控制音色风格）
+     */
+    private String instruction;
     /**
      * Latentsync guidance_scale（1-2）
      */
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java
new file mode 100644
index 0000000000..afecb99344
--- /dev/null
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/enums/CosyVoiceEmotionEnum.java
@@ -0,0 +1,38 @@
+package cn.iocoder.yudao.module.tik.voice.enums;
+
+import cn.hutool.core.util.StrUtil;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+
+/**
+ * CosyVoice情感枚举
+ * 根据阿里云DashScope官方文档定义
+ * 参考：https://help.aliyun.com/zh/dashscope/developer-reference/tts-api
+ */
+@Getter
+@AllArgsConstructor
+public enum CosyVoiceEmotionEnum {
+
+    NEUTRAL("neutral", "中性"),
+    HAPPY("happy", "高兴"),
+    SAD("sad", "悲伤"),
+    ANGRY("angry", "愤怒"),
+    SURPRISED("surprised", "惊讶"),
+    DISGUSTED("disgusted", "厌恶"),
+    SCARED("scared", "害怕");
+
+    private final String code;
+    private final String description;
+
+    public static CosyVoiceEmotionEnum getByCode(String code) {
+        if (StrUtil.isBlank(code)) {
+            return NEUTRAL;
+        }
+        for (CosyVoiceEmotionEnum emotion : values()) {
+            if (emotion.getCode().equalsIgnoreCase(code)) {
+                return emotion;
+            }
+        }
+        return NEUTRAL;
+    }
+}
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java
index b72c7ad987..648f68795f 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/DigitalHumanTaskServiceImpl.java
@@ -1,10 +1,11 @@
 package cn.iocoder.yudao.module.tik.voice.service;
 
-import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.iocoder.yudao.framework.common.pojo.CommonResult;
+import cn.hutool.http.HttpRequest;
+import cn.hutool.http.HttpResponse;
 import cn.iocoder.yudao.framework.common.pojo.PageResult;
 import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
+import cn.iocoder.yudao.framework.common.util.http.HttpUtils;
 import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUtils;
 import cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil;
@@ -13,21 +14,15 @@ import cn.iocoder.yudao.module.infra.dal.dataobject.file.FileDO;
 import cn.iocoder.yudao.module.infra.dal.mysql.file.FileMapper;
 import cn.iocoder.yudao.module.tik.file.dal.dataobject.TikUserFileDO;
 import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
+import cn.iocoder.yudao.module.tik.file.service.TikOssInitService;
 import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikDigitalHumanTaskDO;
 import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikDigitalHumanTaskMapper;
-import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
-import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
 import cn.iocoder.yudao.module.tik.voice.enums.DigitalHumanTaskStatusEnum;
 import cn.iocoder.yudao.module.tik.voice.enums.DigitalHumanTaskStepEnum;
-import cn.iocoder.yudao.module.tik.voice.service.TikUserVoiceService;
 import cn.iocoder.yudao.module.tik.voice.vo.*;
-import cn.iocoder.yudao.module.tik.voice.client.LatentsyncClient;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncResultRespVO;
 import cn.iocoder.yudao.module.tik.voice.service.LatentsyncService;
-import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
-import cn.iocoder.yudao.framework.common.exception.ServiceException;
 import cn.iocoder.yudao.module.tik.enums.ErrorCodeConstants;
-import cn.iocoder.yudao.framework.common.util.date.DateUtils;
-import cn.iocoder.yudao.framework.common.util.string.StrUtils;
 import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
@@ -39,11 +34,6 @@ import org.springframework.transaction.support.TransactionSynchronizationManager
 import org.springframework.validation.annotation.Validated;
 
 import java.time.LocalDateTime;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.TimeUnit;
 
 /**
  * 数字人任务 Service 实现
@@ -57,12 +47,12 @@ import java.util.concurrent.TimeUnit;
 public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
 
     private final TikDigitalHumanTaskMapper taskMapper;
-    private final TikUserVoiceMapper voiceMapper;
     private final TikUserFileMapper userFileMapper;
     private final FileMapper fileMapper;
     private final FileApi fileApi;
     private final TikUserVoiceService userVoiceService;
     private final LatentsyncService latentsyncService;
+    private final TikOssInitService ossInitService;
 
     /**
      * 预签名URL过期时间（24小时）
@@ -97,16 +87,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
 
     @Override
     public AppTikDigitalHumanRespVO getTask(Long taskId) {
-        TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
-        if (task == null) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
-        }
-
-        Long userId = SecurityFrameworkUtils.getLoginUserId();
-        if (!task.getUserId().equals(userId)) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
-        }
-
+        TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
         return convertToRespVO(task);
     }
 
@@ -157,15 +138,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
     @Override
     @Transactional(rollbackFor = Exception.class)
     public void cancelTask(Long taskId) {
-        TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
-        if (task == null) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
-        }
-
-        Long userId = SecurityFrameworkUtils.getLoginUserId();
-        if (!task.getUserId().equals(userId)) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
-        }
+        TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
 
         if (!"PROCESSING".equals(task.getStatus())) {
             throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_CANNOT_CANCEL);
@@ -179,21 +152,13 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
         updateObj.setCurrentStep("canceled");
         taskMapper.updateById(updateObj);
 
-        log.info("[cancelTask][用户({})取消任务({})成功]", userId, taskId);
+        log.info("[cancelTask][用户({})取消任务({})成功]", task.getUserId(), taskId);
     }
 
     @Override
     @Transactional(rollbackFor = Exception.class)
     public void retryTask(Long taskId) {
-        TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
-        if (task == null) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
-        }
-
-        Long userId = SecurityFrameworkUtils.getLoginUserId();
-        if (!task.getUserId().equals(userId)) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
-        }
+        TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
 
         if (!"FAILED".equals(task.getStatus()) && !"CANCELED".equals(task.getStatus())) {
             throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_CANNOT_RETRY);
@@ -212,26 +177,18 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
         // 重新开始异步处理
         processTaskAsync(taskId);
 
-        log.info("[retryTask][用户({})重试任务({})成功]", userId, taskId);
+        log.info("[retryTask][用户({})重试任务({})成功]", task.getUserId(), taskId);
     }
 
     @Override
     @Transactional(rollbackFor = Exception.class)
     public void deleteTask(Long taskId) {
-        TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
-        if (task == null) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
-        }
-
-        Long userId = SecurityFrameworkUtils.getLoginUserId();
-        if (!task.getUserId().equals(userId)) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
-        }
+        TikDigitalHumanTaskDO task = getCurrentUserTask(taskId);
 
         // 删除任务
         taskMapper.deleteById(taskId);
 
-        log.info("[deleteTask][用户({})删除任务({})成功]", userId, taskId);
+        log.info("[deleteTask][用户({})删除任务({})成功]", task.getUserId(), taskId);
     }
 
     @Override
@@ -242,36 +199,46 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
 
     // ========== 私有方法 ==========
 
+    /**
+     * 获取当前用户拥有的任务
+     */
+    private TikDigitalHumanTaskDO getCurrentUserTask(Long taskId) {
+        TikDigitalHumanTaskDO task = taskMapper.selectById(taskId);
+        if (task == null) {
+            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_NOT_EXISTS);
+        }
+
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+        if (!task.getUserId().equals(userId)) {
+            throw ServiceExceptionUtil.exception(ErrorCodeConstants.GENERAL_FORBIDDEN);
+        }
+        return task;
+    }
+
     /**
      * 验证任务输入参数
      */
     private void validateTaskInput(AppTikDigitalHumanCreateReqVO reqVO, Long userId) {
-        // 验证文件信息：必须提供音频和视频文件之一
-        boolean hasAudio = reqVO.getAudioFileId() != null || StrUtil.isNotBlank(reqVO.getAudioUrl());
-        boolean hasVideo = reqVO.getVideoFileId() != null || StrUtil.isNotBlank(reqVO.getVideoUrl());
-
-        if (!hasAudio) {
-            throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_AUDIO_REQUIRED);
+        // 验证文本内容（必填）
+        if (StrUtil.isBlank(reqVO.getInputText())) {
+            throw new IllegalArgumentException("文案不能为空");
         }
+
+        // 验证音色ID（必填）
+        if (StrUtil.isBlank(reqVO.getVoiceId())) {
+            throw new IllegalArgumentException("音色ID不能为空");
+        }
+
+        // 验证视频文件（必填）
+        boolean hasVideo = reqVO.getVideoFileId() != null || StrUtil.isNotBlank(reqVO.getVideoUrl());
         if (!hasVideo) {
             throw ServiceExceptionUtil.exception(ErrorCodeConstants.DIGITAL_HUMAN_TASK_VIDEO_REQUIRED);
         }
 
-        // 如果提供了fileId，验证文件是否存在且属于用户
-        if (reqVO.getAudioFileId() != null) {
-            validateUserFile(reqVO.getAudioFileId(), userId, "音频");
-        }
+        // 如果提供了videoFileId，验证文件是否存在且属于用户
         if (reqVO.getVideoFileId() != null) {
             validateUserFile(reqVO.getVideoFileId(), userId, "视频");
         }
-
-        // 验证配音配置
-        if (reqVO.getVoiceConfigId() != null) {
-            TikUserVoiceDO voice = voiceMapper.selectById(reqVO.getVoiceConfigId());
-            if (voice == null || !voice.getUserId().equals(userId)) {
-                throw ServiceExceptionUtil.exception(ErrorCodeConstants.VOICE_NOT_EXISTS);
-            }
-        }
     }
 
     /**
@@ -279,7 +246,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
      */
     private void validateUserFile(Long fileId, Long userId, String fileType) {
         TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
-                .eq(TikUserFileDO::getId, fileId)
+                .eq(TikUserFileDO::getFileId, fileId)  // 查询fileId字段（指向infra_file.id）
                 .eq(TikUserFileDO::getUserId, userId));
         if (userFile == null) {
             throw ServiceExceptionUtil.exception(ErrorCodeConstants.FILE_NOT_EXISTS, fileType + "文件不存在");
@@ -294,14 +261,14 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
                 .userId(userId)
                 .taskName(reqVO.getTaskName())
                 .aiProvider(StrUtil.blankToDefault(reqVO.getAiProvider(), "302ai"))
-                .audioFileId(reqVO.getAudioFileId())
                 .videoFileId(reqVO.getVideoFileId())
-                .audioUrl(reqVO.getAudioUrl())
                 .videoUrl(reqVO.getVideoUrl())
-                .voiceConfigId(reqVO.getVoiceConfigId())
+                .voiceId(reqVO.getVoiceId())
+                .inputText(reqVO.getInputText())
                 .speechRate(reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f)
                 .volume(reqVO.getVolume() != null ? reqVO.getVolume() : 0f)
                 .emotion(StrUtil.blankToDefault(reqVO.getEmotion(), "neutral"))
+                .instruction(reqVO.getInstruction())
                 .guidanceScale(reqVO.getGuidanceScale() != null ? reqVO.getGuidanceScale() : 1)
                 .seed(reqVO.getSeed() != null ? reqVO.getSeed() : 8888)
                 .status("PENDING")
@@ -388,14 +355,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
     private void prepareFiles(TikDigitalHumanTaskDO task) throws Exception {
         log.info("[prepareFiles][任务({})开始准备文件]", task.getId());
 
-        // 如果提供了fileId，生成预签名URL
-        if (task.getAudioFileId() != null) {
-            FileDO audioFile = fileMapper.selectById(task.getAudioFileId());
-            if (audioFile != null) {
-                task.setAudioUrl(fileApi.presignGetUrl(audioFile.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS));
-            }
-        }
-
+        // 处理视频文件（音频由实时TTS生成，无需准备）
         if (task.getVideoFileId() != null) {
             FileDO videoFile = fileMapper.selectById(task.getVideoFileId());
             if (videoFile != null) {
@@ -403,10 +363,7 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
             }
         }
 
-        // 验证文件URL
-        if (StrUtil.isBlank(task.getAudioUrl())) {
-            throw new Exception("音频文件URL生成失败");
-        }
+        // 验证视频文件URL（音频是实时生成，无需验证）
         if (StrUtil.isBlank(task.getVideoUrl())) {
             throw new Exception("视频文件URL生成失败");
         }
@@ -414,7 +371,6 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
         // 更新任务记录
         TikDigitalHumanTaskDO updateObj = new TikDigitalHumanTaskDO();
         updateObj.setId(task.getId());
-        updateObj.setAudioUrl(task.getAudioUrl());
         updateObj.setVideoUrl(task.getVideoUrl());
         taskMapper.updateById(updateObj);
 
@@ -422,18 +378,49 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
     }
 
     /**
-     * 语音合成
+     * 语音合成（使用CosyVoice v3 Flash）
      */
     private String synthesizeVoice(TikDigitalHumanTaskDO task) throws Exception {
-        log.info("[synthesizeVoice][任务({})开始语音合成]", task.getId());
+        // 参数验证
+        if (StrUtil.isBlank(task.getVoiceId())) {
+            throw new Exception("音色ID不能为空");
+        }
+        if (StrUtil.isBlank(task.getInputText())) {
+            throw new Exception("输入文本不能为空");
+        }
 
-        // TODO: 调用现有的语音合成服务
-        // 这里需要根据实际的语音合成API进行集成
+        log.info("[synthesizeVoice][任务({})开始语音合成][voiceId={}, textLength={}]",
+                task.getId(), task.getVoiceId(), task.getInputText().length());
 
-        // 临时返回音频URL（实际应该调用语音合成服务）
-        String audioUrl = task.getAudioUrl();
+        // 构建TTS请求参数
+        AppTikVoiceTtsReqVO ttsReqVO = new AppTikVoiceTtsReqVO();
+        ttsReqVO.setInputText(task.getInputText());
+        ttsReqVO.setVoiceId(task.getVoiceId());
+        ttsReqVO.setSpeechRate(task.getSpeechRate() != null ? task.getSpeechRate() : 1.0f);
+        ttsReqVO.setVolume(task.getVolume() != null ? task.getVolume() : 0f);
+        ttsReqVO.setInstruction(task.getInstruction());
+        ttsReqVO.setAudioFormat("mp3");
 
-        log.info("[synthesizeVoice][任务({})语音合成完成]", task.getId());
+        // 调用语音合成服务
+        AppTikVoiceTtsRespVO ttsRespVO = userVoiceService.synthesizeVoice(ttsReqVO);
+
+        if (ttsRespVO == null) {
+            throw new Exception("语音合成失败，返回结果为空");
+        }
+
+        // 支持Base64和AudioUrl两种返回方式
+        String audioUrl = null;
+        if (StrUtil.isNotBlank(ttsRespVO.getAudioUrl())) {
+            // 优先使用AudioUrl
+            audioUrl = ttsRespVO.getAudioUrl();
+        } else if (StrUtil.isNotBlank(ttsRespVO.getAudioBase64())) {
+            // 如果是Base64，需要保存为临时文件并获取URL
+            audioUrl = saveTempAudioFile(ttsRespVO.getAudioBase64(), ttsRespVO.getFormat());
+        } else {
+            throw new Exception("语音合成失败，未返回音频数据");
+        }
+
+        log.info("[synthesizeVoice][任务({})语音合成完成][audioUrl={}]", task.getId(), audioUrl);
         return audioUrl;
     }
 
@@ -482,12 +469,54 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
         reqVO.setGuidanceScale(task.getGuidanceScale());
         reqVO.setSeed(task.getSeed());
 
-        // 调用Latentsync服务
+        // 调用Latentsync服务提交任务
         AppTikLatentsyncSubmitRespVO response = latentsyncService.submitTask(reqVO);
+        String requestId = response.getRequestId();
 
-        // 等待处理完成（这里需要根据实际的Latentsync API调整）
-        // 临时返回处理后的视频URL
-        return task.getVideoUrl();
+        log.info("[syncWithLatentsync][任务({})提交成功，requestId={}]", task.getId(), requestId);
+
+        // 轮询等待任务完成
+        int maxAttempts = 60; // 最多轮询60次
+        int attempt = 0;
+        while (attempt < maxAttempts) {
+            attempt++;
+            try {
+                // 获取任务结果
+                AppTikLatentsyncResultRespVO result = latentsyncService.getTaskResult(requestId);
+                String status = result.getStatus();
+
+                log.info("[syncWithLatentsync][任务({})轮询结果: 第{}次, status={}]", task.getId(), attempt, status);
+
+                if ("COMPLETED".equals(status)) {
+                    // 任务完成，获取视频URL
+                    String videoUrl = result.getVideo().getUrl();
+                    if (StrUtil.isNotBlank(videoUrl)) {
+                        log.info("[syncWithLatentsync][任务({})口型同步完成，videoUrl={}]", task.getId(), videoUrl);
+                        return videoUrl;
+                    } else {
+                        throw new Exception("Latentsync 返回视频URL为空");
+                    }
+                } else if ("FAILED".equals(status)) {
+                    throw new Exception("Latentsync 任务处理失败");
+                }
+
+                // 等待5秒后再次轮询
+                Thread.sleep(5000);
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+                throw new Exception("等待Latentsync结果时被中断", e);
+            } catch (Exception e) {
+                log.error("[syncWithLatentsync][任务({})轮询异常: {}]", task.getId(), e.getMessage(), e);
+                // 如果是最后一次尝试，抛出异常
+                if (attempt >= maxAttempts) {
+                    throw new Exception("等待Latentsync结果超时: " + e.getMessage(), e);
+                }
+                // 否则等待后重试
+                Thread.sleep(5000);
+            }
+        }
+
+        throw new Exception("等待Latentsync结果超时");
     }
 
     /**
@@ -498,8 +527,8 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
 
         // TODO: 这里可以添加视频后处理逻辑，比如添加字幕、特效等
 
-        // 临时返回同步后的视频URL
-        String resultVideoUrl = syncedVideoUrl;
+        // 保存同步后的视频到OSS
+        String resultVideoUrl = saveVideoToOss(task, syncedVideoUrl);
 
         log.info("[generateVideo][任务({})视频生成完成]", task.getId());
         return resultVideoUrl;
@@ -539,6 +568,25 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
         log.info("[updateTaskStatus][任务({})状态更新: {}]", taskId, updateObj);
     }
 
+    /**
+     * 保存Base64音频数据为临时文件并返回访问URL
+     */
+    private String saveTempAudioFile(String audioBase64, String format) throws Exception {
+        try {
+            // 解码Base64为字节数组
+            byte[] audioBytes = java.util.Base64.getDecoder().decode(audioBase64);
+            // 生成临时文件名
+            String fileName = "temp_audio_" + System.currentTimeMillis() + "." + (StrUtil.isNotBlank(format) ? format : "mp3");
+            // 保存到临时目录
+            String directory = "temp/audio";
+            // 保存文件并返回访问URL
+            return fileApi.createFile(audioBytes, fileName, directory, "audio/" + format);
+        } catch (Exception e) {
+            log.error("[saveTempAudioFile][保存音频文件失败]", e);
+            throw new Exception("保存音频文件失败：" + e.getMessage());
+        }
+    }
+
     /**
      * 更新任务进度
      */
@@ -546,4 +594,57 @@ public class DigitalHumanTaskServiceImpl implements DigitalHumanTaskService {
         updateTaskStatus(taskId, "PROCESSING", step.getStep(), step.getProgress(), message, null);
     }
 
+    /**
+     * 保存视频到OSS
+     */
+    private String saveVideoToOss(TikDigitalHumanTaskDO task, String remoteVideoUrl) throws Exception {
+        log.info("[saveVideoToOss][任务({})开始下载并保存视频到OSS][remoteUrl={}]", task.getId(), remoteVideoUrl);
+
+        try {
+            // 1. 下载远程视频文件
+            byte[] videoBytes = downloadRemoteFile(remoteVideoUrl);
+
+            // 2. 获取OSS目录路径（使用"generate"分类，符合数字人生成的语义）
+            Long userId = task.getUserId();
+            String baseDirectory = ossInitService.getOssDirectoryByCategory(userId, "generate");
+
+            // 3. 生成文件名（格式：task_{taskId}_{timestamp}.mp4）
+            String fileName = String.format("task_%d_%d.mp4", task.getId(), System.currentTimeMillis());
+
+            // 4. 保存到OSS
+            String ossUrl = fileApi.createFile(videoBytes, fileName, baseDirectory, "video/mp4");
+
+            // 5. 移除预签名URL中的签名参数，获取基础URL（用于存储）
+            String cleanOssUrl = HttpUtils.removeUrlQuery(ossUrl);
+
+            log.info("[saveVideoToOss][任务({})视频保存到OSS完成][directory={}, fileName={}, ossUrl={}]",
+                    task.getId(), baseDirectory, fileName, cleanOssUrl);
+            return cleanOssUrl;
+
+        } catch (Exception e) {
+            log.error("[saveVideoToOss][任务({})保存视频到OSS失败][remoteUrl={}]", task.getId(), remoteVideoUrl, e);
+            // 如果保存失败，返回原始URL（降级处理）
+            return remoteVideoUrl;
+        }
+    }
+
+    /**
+     * 下载远程文件
+     */
+    private byte[] downloadRemoteFile(String remoteUrl) throws Exception {
+        log.info("[downloadRemoteFile][下载文件][url={}]", remoteUrl);
+
+        try (HttpResponse response = HttpRequest.get(remoteUrl)
+                .execute()) {
+
+            if (!response.isOk()) {
+                throw new Exception("下载文件失败: HTTP " + response.getStatus());
+            }
+
+            byte[] bytes = response.bodyBytes();
+            log.info("[downloadRemoteFile][文件下载完成][size={} bytes]", bytes.length);
+            return bytes;
+        }
+    }
+
 }
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncService.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncService.java
index 4c14ff0801..dd6d1f402f 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncService.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncService.java
@@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.tik.voice.service;
 
 import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
 import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncResultRespVO;
 
 /**
  * Latentsync 口型同步 Service
@@ -15,6 +16,14 @@ public interface LatentsyncService {
      * @return 任务响应
      */
     AppTikLatentsyncSubmitRespVO submitTask(AppTikLatentsyncSubmitReqVO reqVO);
+
+    /**
+     * 获取 Latentsync 任务结果
+     *
+     * @param requestId 任务 ID
+     * @return 任务结果
+     */
+    AppTikLatentsyncResultRespVO getTaskResult(String requestId);
 }
 
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncServiceImpl.java
index 152956906a..2a3de45b3d 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncServiceImpl.java
@@ -4,9 +4,7 @@ import cn.hutool.core.util.StrUtil;
 import cn.iocoder.yudao.module.tik.voice.client.LatentsyncClient;
 import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitRequest;
 import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitResponse;
-import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
-import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
-import jakarta.validation.Valid;
+import cn.iocoder.yudao.module.tik.voice.vo.*;
 import lombok.RequiredArgsConstructor;
 import org.springframework.stereotype.Service;
 import org.springframework.validation.annotation.Validated;
@@ -22,7 +20,7 @@ public class LatentsyncServiceImpl implements LatentsyncService {
     private final LatentsyncClient latentsyncClient;
 
     @Override
-    public AppTikLatentsyncSubmitRespVO submitTask(@Valid AppTikLatentsyncSubmitReqVO reqVO) {
+    public AppTikLatentsyncSubmitRespVO submitTask(AppTikLatentsyncSubmitReqVO reqVO) {
         LatentsyncSubmitRequest request = LatentsyncSubmitRequest.builder()
                 .audioUrl(StrUtil.trim(reqVO.getAudioUrl()))
                 .videoUrl(StrUtil.trim(reqVO.getVideoUrl()))
@@ -37,6 +35,27 @@ public class LatentsyncServiceImpl implements LatentsyncService {
         respVO.setQueuePosition(response.getQueuePosition());
         return respVO;
     }
+
+    @Override
+    public AppTikLatentsyncResultRespVO getTaskResult(String requestId) {
+        LatentsyncSubmitResponse response = latentsyncClient.getTaskResult(requestId);
+
+        AppTikLatentsyncResultRespVO respVO = new AppTikLatentsyncResultRespVO();
+        respVO.setRequestId(response.getRequestId());
+        respVO.setStatus(response.getStatus());
+        respVO.setSeed(response.getSeed());
+
+        // 转换视频信息
+        if (response.getVideo() != null) {
+            AppTikLatentsyncResultRespVO.VideoInfo videoInfo = new AppTikLatentsyncResultRespVO.VideoInfo();
+            videoInfo.setUrl(response.getVideo().getUrl());
+            videoInfo.setContentType(response.getVideo().getContentType());
+            videoInfo.setFileSize(response.getVideo().getFileSize());
+            respVO.setVideo(videoInfo);
+        }
+
+        return respVO;
+    }
 }
 
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
index 43f4c9d561..36f0065c5f 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -27,7 +27,6 @@ import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
 import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
 import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
 import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
-import cn.iocoder.yudao.module.tik.voice.util.ByteArrayMultipartFile;
 import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
 import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
 import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
@@ -146,7 +145,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
             String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
 
             CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
-            cloneRequest.setTargetModel("cosyvoice-v2"); // 使用v2模型，效果更好
+            cloneRequest.setTargetModel("cosyvoice-v3-flash"); // 使用v3-flash模型
             cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀，格式要求
             cloneRequest.setUrl(fileAccessUrl);
 
@@ -416,7 +415,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 transcriptionText,
                 reqVO.getInputText(),
                 false);
-        finalText = appendEmotion(finalText, reqVO.getEmotion());
+        // 移除appendEmotion调用，情感通过instruction参数传递
 
         String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
                 voiceId,
@@ -424,7 +423,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 finalText,
                 reqVO.getSpeechRate(),
                 reqVO.getVolume(),
-                reqVO.getEmotion(),
+                reqVO.getInstruction(),
                 reqVO.getAudioFormat(),
                 reqVO.getSampleRate());
 
@@ -441,7 +440,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 reqVO.getModel(),
                 reqVO.getSpeechRate(),
                 reqVO.getVolume(),
-                reqVO.getEmotion(),
+                reqVO.getInstruction(),
                 reqVO.getSampleRate(),
                 reqVO.getAudioFormat(),
                 false
@@ -563,8 +562,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 true);
         
         // 使用请求参数或默认值
-        String emotion = StrUtil.blankToDefault(reqVO.getEmotion(), "neutral");
-        finalText = appendEmotion(finalText, emotion);
+        String instruction = reqVO.getInstruction();
+        // 注意：instruction参数现在直接传递给CosyVoice，不再添加到文本中
         Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
         Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
         String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
@@ -576,7 +575,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 finalText,
                 speechRate,
                 volume,
-                emotion,
+                instruction,
                 audioFormat,
                 null);
         PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
@@ -599,7 +598,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 null, // 使用默认模型
                 speechRate,
                 volume,
-                emotion,
+                instruction,
                 null,
                 audioFormat,
                 true
@@ -630,7 +629,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                                                 String model,
                                                 Float speechRate,
                                                 Float volume,
-                                                String emotion,
+                                                String instruction,
                                                 Integer sampleRate,
                                                 String audioFormat,
                                                 boolean preview) {
@@ -642,7 +641,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 .model(model)
                 .speechRate(speechRate)
                 .volume(volume)
-                .emotion(emotion)
+                .instruction(instruction)
                 .sampleRate(sampleRate)
                 .audioFormat(audioFormat)
                 .preview(preview)
@@ -738,7 +737,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                                  String text,
                                  Float speechRate,
                                  Float volume,
-                                 String emotion,
+                                 String instruction,
                                  String audioFormat,
                                  Integer sampleRate) {
         // 构建标识符：优先使用voiceId，如果没有则使用fileUrl的稳定部分（去除查询参数）
@@ -757,7 +756,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                 text,
                 speechRate != null ? speechRate : "1.0",
                 volume != null ? volume : "0",
-                StrUtil.blankToDefault(emotion, "neutral"),
+                instruction,
                 StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
                 sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
         String hash = cn.hutool.crypto.SecureUtil.sha256(payload);
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java
index ad26191c81..b990622123 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanCreateReqVO.java
@@ -26,22 +26,20 @@ public class AppTikDigitalHumanCreateReqVO {
     @Schema(description = "AI供应商（默认302ai）", example = "302ai", allowableValues = {"302ai", "aliyun", "openai", "minimax"})
     private String aiProvider;
 
-    @Schema(description = "音频文件ID（tik_user_file.id），与audioUrl二选一", example = "123")
-    private Long audioFileId;
-
-    @Schema(description = "音频文件URL（公网可访问），与audioFileId二选一", example = "https://example.com/audio.wav")
-    @Size(max = 1024, message = "音频URL不能超过1024个字符")
-    private String audioUrl;
-
-    @Schema(description = "视频文件ID（tik_user_file.id），与videoUrl二选一", example = "456")
+    @Schema(description = "视频文件ID（tik_user_file.id）", example = "456")
     private Long videoFileId;
 
-    @Schema(description = "视频文件URL（公网可访问），与videoFileId二选一", example = "https://example.com/video.mp4")
+    @Schema(description = "视频文件URL（公网可访问，与videoFileId二选一）", example = "https://example.com/video.mp4")
     @Size(max = 1024, message = "视频URL不能超过1024个字符")
     private String videoUrl;
 
-    @Schema(description = "配音配置ID（tik_user_voice.id）", example = "789")
-    private Long voiceConfigId;
+    @Schema(description = "音色ID（CosyVoice voiceId）", example = "cosyvoice-v3-flash-sys-xxx")
+    private String voiceId;
+
+    @Schema(description = "输入文本（用于语音合成，文案必填）", example = "您好，欢迎体验数字人")
+    @NotBlank(message = "文案不能为空")
+    @Size(max = 4000, message = "文本不能超过4000个字符")
+    private String inputText;
 
     @Schema(description = "语速（0.5-2.0，默认1.0）", example = "1.0")
     @DecimalMin(value = "0.5", message = "语速不能小于0.5")
@@ -64,4 +62,7 @@ public class AppTikDigitalHumanCreateReqVO {
     @Schema(description = "随机种子（默认8888）", example = "8888")
     private Integer seed;
 
+    @Schema(description = "指令（用于控制音色风格）", example = "请用温柔专业的语调朗读")
+    private String instruction;
+
 }
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java
index 2739f1a696..d53ed86ae7 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikDigitalHumanRespVO.java
@@ -37,7 +37,7 @@ public class AppTikDigitalHumanRespVO {
     @Schema(description = "配音配置ID", example = "789")
     private Long voiceConfigId;
 
-    @Schema(description = "voice_id", example = "cosyvoice-v2-xxx")
+    @Schema(description = "voice_id", example = "cosyvoice-v3-flash-xxx")
     private String voiceId;
 
     @Schema(description = "语速", example = "1.0")
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncResultRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncResultRespVO.java
new file mode 100644
index 0000000000..4ac98eab3a
--- /dev/null
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncResultRespVO.java
@@ -0,0 +1,36 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+/**
+ * Latentsync 任务结果响应 VO
+ */
+@Data
+public class AppTikLatentsyncResultRespVO {
+
+    @Schema(description = "Latentsync 任务 ID", example = "8eed0b9b-6103-4357-a57b-9f135a8c3276")
+    private String requestId;
+
+    @Schema(description = "官方状态，如 IN_QUEUE、PROCESSING、COMPLETED、FAILED", example = "COMPLETED")
+    private String status;
+
+    @Schema(description = "种子值")
+    private Integer seed;
+
+    @Schema(description = "视频信息")
+    private VideoInfo video;
+
+    @Schema(description = "视频信息")
+    @Data
+    public static class VideoInfo {
+        @Schema(description = "视频URL")
+        private String url;
+
+        @Schema(description = "内容类型", example = "video/mp4")
+        private String contentType;
+
+        @Schema(description = "文件大小（字节）")
+        private Integer fileSize;
+    }
+}
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
index a103e5c1a1..cea266070e 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
@@ -39,6 +39,9 @@ public class AppTikVoicePreviewReqVO {
 
     @Schema(description = "音频格式（可选，默认mp3）", example = "mp3")
     private String audioFormat;
+
+    @Schema(description = "指令（用于控制音色风格）", example = "请用温柔专业的语调朗读")
+    private String instruction;
 }
 
 
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
index 73688a3dcf..e4e9f76d79 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
@@ -21,20 +21,20 @@ public class AppTikVoiceTtsReqVO {
     @Size(max = 4000, message = "识别文本不能超过 4000 个字符")
     private String transcriptionText;
 
-    @Schema(description = "音色 ID（CosyVoice voiceId）", example = "cosyvoice-v2-myvoice-xxx")
+    @Schema(description = "音色 ID（CosyVoice voiceId）", example = "cosyvoice-v3-flash-myvoice-xxx")
     private String voiceId;
 
     @Schema(description = "音色源音频 OSS 地址（当没有 voiceId 时必传）")
     private String fileUrl;
 
-    @Schema(description = "模型名称，默认 cosyvoice-v2", example = "cosyvoice-v3")
+    @Schema(description = "模型名称，默认 cosyvoice-v3-flash", example = "cosyvoice-v3-flash")
     private String model;
 
     @Schema(description = "语速，默认 1.0", example = "1.0")
     private Float speechRate;
 
-    @Schema(description = "情感", example = "happy")
-    private String emotion;
+    @Schema(description = "指令（用于控制音色风格）", example = "请用温柔专业的语调朗读")
+    private String instruction;
 
     @Schema(description = "音量调节范围 [-10,10]", example = "0")
     private Float volume;
diff --git a/yudao-server/src/main/resources/application.yaml b/yudao-server/src/main/resources/application.yaml
index 1ab84c21f3..d1589ae2f6 100644
--- a/yudao-server/src/main/resources/application.yaml
+++ b/yudao-server/src/main/resources/application.yaml
@@ -216,7 +216,7 @@ yudao:
   cosyvoice:
     enabled: true
     api-key: sk-10c746f8cb8640738f8d6b71af699003
-    default-model: cosyvoice-v2
+    default-model: cosyvoice-v3-flash
     sample-rate: 24000
     audio-format: mp3
     preview-text: 您好，欢迎体验专属音色