diff --git a/frontend/app/web-gold/src/views/kling/IdentifyFace.vue b/frontend/app/web-gold/src/views/kling/IdentifyFace.vue
index d502d347e8..ec4c06ae34 100644
--- a/frontend/app/web-gold/src/views/kling/IdentifyFace.vue
+++ b/frontend/app/web-gold/src/views/kling/IdentifyFace.vue
@@ -137,55 +137,30 @@
             </div>
           </div>
 
-          <!-- 配音生成与校验（仅在识别后显示） -->
-          <div v-if="identifyState.identified" class="section audio-generation-section">
-            <h3>配音生成与校验</h3>
-
-            <!-- 生成配音按钮 -->
-            <div class="generate-audio-row">
-              <a-button
-                type="default"
-                size="large"
-                :disabled="!canGenerateAudio"
-                :loading="audioState.generating"
-                block
-                @click="generateAudio"
-              >
-                {{ audioState.generating ? '生成中...' : '生成配音（用于校验时长）' }}
-              </a-button>
-            </div>
-
-            <!-- 音频预览 -->
-            <div v-if="audioState.generated" class="audio-preview">
-              <div class="audio-info">
-                <h4>生成的配音</h4>
-                <div class="duration-info">
-                  <span class="label">音频时长：</span>
-                  <span class="value">{{ audioDurationSec }} 秒</span>
-                </div>
-                <div class="duration-info">
-                  <span class="label">人脸区间：</span>
-                  <span class="value">{{ faceDurationSec }} 秒</span>
-                </div>
-                <div class="duration-info" :class="{ 'validation-passed': validationPassed, 'validation-failed': !validationPassed }">
-                  <span class="label">校验结果：</span>
-                  <span class="value">
-                    {{ validationPassed ? '✅ 通过' : '❌ 不通过（音频时长不能超过人脸时长）' }}
-                  </span>
-                </div>
+          <!-- 配音生成（仅在 Pipeline 到达 ready 状态后显示） -->
+          <div v-if="isPipelineReady" class="section audio-section">
+            <!-- 已生成音频 -->
+            <div v-if="audioState.generated" class="audio-generated">
+              <div class="audio-header">
+                <span class="audio-title">配音</span>
+                <span class="audio-duration">{{ audioDurationSec }}秒</span>
               </div>
 
               <!-- 音频播放器 -->
-              <div v-if="audioUrl" class="audio-player">
-                <audio :src="audioUrl" controls class="audio-element" />
+              <div v-if="audioUrl" class="audio-player-wrapper">
+                <audio :src="audioUrl" controls class="audio-player" />
               </div>
 
-              <!-- 重新生成按钮 -->
-              <div class="regenerate-row">
-                <a-button type="link" size="small" @click="generateAudio" :loading="audioState.generating">
-                  重新生成
-                </a-button>
+              <!-- 校验失败提示 -->
+              <div v-if="!validationPassed" class="validation-warning">
+                <span class="warning-icon">⚠️</span>
+                <span class="warning-text">音频时长({{ audioDurationSec }}秒)超过视频人脸区间({{ faceDurationSec }}秒)，请缩短文案或调整语速</span>
               </div>
+
+              <!-- 重新生成 -->
+              <a-button type="link" size="small" :loading="audioState.generating" @click="generateAudio">
+                重新生成
+              </a-button>
             </div>
           </div>
 
@@ -205,12 +180,26 @@
 
           <!-- 按钮组 -->
           <div class="action-buttons">
+            <!-- 准备阶段：先运行到 ready -->
             <a-button
+              v-if="!isPipelineReady"
               type="primary"
               size="large"
               :disabled="!canGenerate"
               :loading="isPipelineBusy"
               block
+              @click="generateAudio"
+            >
+              {{ isPipelineBusy ? '处理中...' : '生成配音并验证' }}
+            </a-button>
+
+            <!-- Ready 后：生成数字人视频 -->
+            <a-button
+              v-else
+              type="primary"
+              size="large"
+              :loading="isPipelineBusy"
+              block
               @click="generateDigitalHuman"
             >
               {{ isPipelineBusy ? '处理中...' : '生成数字人视频' }}
@@ -250,19 +239,16 @@ const dragOver = ref(false)
 // Controller 内部直接创建和管理两个子 Hook
 const controller = useIdentifyFaceController()
 
-
 // 解构 controller 以简化模板调用
 const {
   // 语音生成相关
   ttsText,
   speechRate,
   audioState,
-  canGenerateAudio,
   generateAudio,
 
   // 数字人生成相关
   videoState,
-  identifyState,
   getVideoPreviewUrl,
 
   // 计算属性
@@ -276,7 +262,7 @@ const {
   audioUrl,
   validationPassed,
 
-  // Pipeline 状态
+  // Pipeline 状态（单一状态源）
   pipelineState,
   isPipelineBusy,
   isPipelineReady,
@@ -370,20 +356,15 @@ onMounted(async () => {
   font-weight: 600;
 }
 
-.card-content h4,
-.audio-info h4 {
+.card-content h4 {
   color: var(--text-primary);
   font-size: 14px;
   margin-bottom: 12px;
 }
 
-.card-content p,
-.duration-label span:first-child {
+.card-content p {
   color: var(--text-secondary);
   font-size: 13px;
-}
-
-.card-content p {
   margin: 0;
 }
 
@@ -401,24 +382,6 @@ onMounted(async () => {
   }
 }
 
-.text-hint {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  margin-top: 12px;
-  padding: 12px 16px;
-  background: rgba(var(--color-primary), 0.1);
-  border: 1px solid rgba(var(--color-primary), 0.3);
-  border-radius: 8px;
-  font-size: 13px;
-  color: var(--text-secondary);
-}
-
-.hint-icon {
-  font-size: 16px;
-}
-
-/* ========== 控制面板 ========== */
 .control-group {
   margin-bottom: 16px;
 }
@@ -635,175 +598,76 @@ onMounted(async () => {
   border-radius: 8px;
 }
 
-/* ========== 验证结果 ========== */
-.validation-result {
-  padding: 16px;
-  background: var(--bg-primary);
-  border-radius: 8px;
-  border: 1px solid var(--border-light);
+/* ========== 音频区域 ========== */
+.audio-section {
+  margin-bottom: 24px;
 }
 
-.validation-result.validation-passed {
-  border-color: var(--color-success);
-  background: rgba(var(--color-success), 0.05);
+.audio-generated {
+  display: flex;
+  flex-direction: column;
+  gap: 12px;
 }
 
-.validation-result.validation-failed {
-  border-color: var(--color-error);
-  background: rgba(var(--color-error), 0.05);
-}
-
-.validation-status {
+.audio-header {
   display: flex;
   align-items: center;
-  gap: 8px;
-  margin-bottom: 12px;
-  padding: 12px;
-  background: var(--bg-secondary);
-  border-radius: 6px;
+  justify-content: space-between;
 }
 
-.status-icon {
-  font-size: 18px;
-}
-
-.status-text {
-  color: var(--text-primary);
+.audio-title {
   font-size: 14px;
   font-weight: 600;
-}
-
-/* ========== 时长对比进度条 ========== */
-.duration-comparison {
-  margin-bottom: 16px;
-  padding: 12px;
-  background: var(--bg-secondary);
-  border-radius: 6px;
-}
-
-.duration-bar {
-  margin-bottom: 12px;
-
-  &:last-child {
-    margin-bottom: 0;
-  }
-}
-
-.duration-label {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  margin-bottom: 8px;
-  font-size: 13px;
-}
-
-.duration-value {
   color: var(--text-primary);
-  font-weight: 600;
-  font-size: 13px;
-  padding: 4px 8px;
-  background: var(--bg-primary);
-  border-radius: 4px;
 }
 
-.progress-bar {
-  height: 8px;
-  background: var(--bg-primary);
-  border-radius: 4px;
-  overflow: hidden;
-}
-
-.progress-fill {
-  height: 100%;
-  border-radius: 4px;
-  transition: width 0.3s;
-}
-
-.audio-bar .progress-fill {
-  background: var(--color-primary);
-}
-
-.video-bar .progress-fill.success {
-  background: var(--color-success);
-}
-
-.video-bar .progress-fill.error {
-  background: var(--color-error);
-}
-
-/* ========== 错误提示 ========== */
-.validation-error {
-  padding: 12px;
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-light);
-  border-radius: 6px;
-}
-
-.error-message {
-  color: var(--color-error);
-  font-size: 13px;
-  margin: 0 0 12px 0;
-}
-
-.quick-actions {
-  display: flex;
-  gap: 8px;
-}
-
-/* ========== 音频生成 ========== */
-.audio-generation-section {
-  margin-bottom: 24px;
-  padding: 16px;
-  background: var(--bg-secondary);
-  border-radius: 8px;
-  border: 1px solid var(--border-light);
-}
-
-.generate-audio-row {
-  margin-bottom: 16px;
-}
-
-.audio-preview {
-  padding: 16px;
-  background: var(--bg-primary);
-  border-radius: 8px;
-}
-
-.duration-info {
-  display: flex;
-  justify-content: space-between;
-  margin-bottom: 8px;
-  font-size: 13px;
-}
-
-.duration-info .label {
+.audio-duration {
+  font-size: 12px;
   color: var(--text-secondary);
 }
 
-.duration-info .value {
-  color: var(--text-primary);
-  font-weight: 600;
-}
-
-.duration-info.validation-passed .value {
-  color: var(--color-success);
-}
-
-.duration-info.validation-failed .value {
-  color: var(--color-error);
-}
-
-.audio-player {
-  margin: 16px 0;
-}
-
-.audio-element {
+.audio-player-wrapper {
   width: 100%;
 }
 
-.regenerate-row {
+.audio-player {
+  width: 100%;
+  height: 36px;
+}
+
+.validation-warning {
+  display: flex;
+  align-items: flex-start;
+  gap: 8px;
+  padding: 10px 12px;
+  background: rgba(var(--color-warning), 0.1);
+  border: 1px solid rgba(var(--color-warning), 0.3);
+  border-radius: 6px;
+  font-size: 13px;
+}
+
+.warning-icon {
+  flex-shrink: 0;
+  font-size: 14px;
+}
+
+.warning-text {
+  color: var(--text-secondary);
+  line-height: 1.4;
+}
+
+.audio-prompt {
   text-align: center;
-  margin-top: 12px;
+  padding: 20px;
+  background: var(--bg-secondary);
+  border-radius: 8px;
+  border: 1px dashed var(--border-light);
+}
+
+.audio-prompt p {
+  margin: 0 0 16px 0;
+  font-size: 14px;
+  color: var(--text-secondary);
 }
 
 /* ========== 操作按钮 ========== */
@@ -823,18 +687,6 @@ onMounted(async () => {
   border-radius: 8px;
 }
 
-.generate-hint {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  padding: 12px 16px;
-  background: rgba(var(--color-warning), 0.1);
-  border: 1px solid rgba(var(--color-warning), 0.3);
-  border-radius: 6px;
-  font-size: 13px;
-  color: var(--color-warning);
-}
-
 /* ========== 响应式 ========== */
 @media (max-width: 1024px) {
   .kling-content {
diff --git a/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts b/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts
index e875313cef..dd87b331ed 100644
--- a/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts
+++ b/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts
@@ -1,19 +1,21 @@
 /**
  * @fileoverview useDigitalHumanGeneration Hook - 数字人生成逻辑
+ *
+ * 重构后：不管理识别状态，只提供数据和操作方法
+ * 状态由 Pipeline 统一管理
  */
 
 import { ref, computed } from 'vue'
 import { message } from 'ant-design-vue'
 import type {
-  UseDigitalHumanGeneration,
   VideoState,
-  IdentifyState,
+  IdentifyResult,
   Video,
 } from '../types/identify-face'
 import { identifyUploadedVideo } from '@/api/kling'
 import { useUpload } from '@/composables/useUpload'
 
-export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
+export function useDigitalHumanGeneration() {
   // ========== 状态 ==========
   const videoState = ref<VideoState>({
     uploadedVideo: '',
@@ -25,9 +27,8 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
     selectorVisible: false,
   })
 
-  const identifyState = ref<IdentifyState>({
-    identifying: false,
-    identified: false,
+  // 识别结果数据（不含状态标志）
+  const identifyResult = ref<IdentifyResult>({
     sessionId: '',
     faceId: '',
     faceStartTime: 0,
@@ -39,7 +40,15 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
 
   // ========== 计算属性 ==========
   const faceDuration = computed(function() {
-    return identifyState.value.faceEndTime - identifyState.value.faceStartTime
+    return identifyResult.value.faceEndTime - identifyResult.value.faceStartTime
+  })
+
+  const hasVideo = computed(function() {
+    return !!videoState.value.uploadedVideo || !!videoState.value.selectedVideo
+  })
+
+  const isIdentified = computed(function() {
+    return !!identifyResult.value.sessionId
   })
 
   // ========== 方法 ==========
@@ -55,7 +64,7 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
     videoState.value.selectedVideo = null
     videoState.value.previewVideoUrl = ''
     videoState.value.videoSource = 'upload'
-    resetIdentifyState()
+    resetIdentifyResult()
   }
 
   async function handleVideoSelect(video: Video): Promise<void> {
@@ -64,67 +73,65 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
     videoState.value.videoFile = null
     videoState.value.videoSource = 'select'
     videoState.value.selectorVisible = false
-    resetIdentifyState()
-    identifyState.value.videoFileId = video.fileId
+    resetIdentifyResult()
+    identifyResult.value.videoFileId = video.fileId
   }
 
-  async function performFaceRecognition(): Promise<void> {
+  /**
+   * 执行人脸识别
+   * 返回识别结果供 Pipeline 使用
+   */
+  async function performFaceRecognition(): Promise<IdentifyResult> {
     const hasUploadFile = videoState.value.videoFile
     const hasSelectedVideo = videoState.value.selectedVideo
 
-    if (!hasUploadFile && !hasSelectedVideo) return
+    if (!hasUploadFile && !hasSelectedVideo) {
+      throw new Error('请先选择视频')
+    }
 
-    identifyState.value.identifying = true
-
-    try {
-      if (hasSelectedVideo) {
-        const res = await identifyUploadedVideo(hasSelectedVideo) as { success: boolean; data: { sessionId: string; faceId: string | null; startTime: number; endTime: number } }
-        identifyState.value.videoFileId = hasSelectedVideo.fileId
-
-        identifyState.value.sessionId = res.data.sessionId
-        identifyState.value.faceId = res.data.faceId || ''
-        identifyState.value.faceStartTime = res.data.startTime || 0
-        identifyState.value.faceEndTime = res.data.endTime || 0
-      } else {
-        const file = hasUploadFile!
-        let coverBase64 = null
-        try {
-          const { extractVideoCover } = await import('@/utils/video-cover')
-          const cover = await extractVideoCover(file, { maxWidth: 800, quality: 0.8 })
-          coverBase64 = cover.base64
-        } catch {
-          // 封面提取失败不影响主流程
-        }
-
-        const fileId = await upload(file, {
-          fileCategory: 'video',
-          groupId: null,
-          coverBase64,
-          onStart: function() {},
-          onProgress: function() {},
-          onSuccess: function() {},
-          onError: function(err: Error) {
-            message.error(err.message || '上传失败')
-          }
-        })
-
-        identifyState.value.videoFileId = fileId
-        identifyState.value.sessionId = ''
-        identifyState.value.faceId = ''
-        identifyState.value.faceStartTime = 0
-        identifyState.value.faceEndTime = 0
+    if (hasSelectedVideo) {
+      const res = await identifyUploadedVideo(hasSelectedVideo) as {
+        success: boolean;
+        data: { sessionId: string; faceId: string | null; startTime: number; endTime: number }
+      }
+      identifyResult.value.videoFileId = hasSelectedVideo.fileId
+      identifyResult.value.sessionId = res.data.sessionId
+      identifyResult.value.faceId = res.data.faceId || ''
+      identifyResult.value.faceStartTime = res.data.startTime || 0
+      identifyResult.value.faceEndTime = res.data.endTime || 0
+    } else {
+      const file = hasUploadFile!
+      let coverBase64 = null
+      try {
+        const { extractVideoCover } = await import('@/utils/video-cover')
+        const cover = await extractVideoCover(file, { maxWidth: 800, quality: 0.8 })
+        coverBase64 = cover.base64
+      } catch {
+        // 封面提取失败不影响主流程
       }
 
-      identifyState.value.identified = true
+      const fileId = await upload(file, {
+        fileCategory: 'video',
+        groupId: null,
+        coverBase64,
+        onStart: function() {},
+        onProgress: function() {},
+        onSuccess: function() {},
+        onError: function(err: Error) {
+          message.error(err.message || '上传失败')
+        }
+      })
 
-      // 识别完成，不显示提示信息
-    } catch (error: unknown) {
-      const err = error as Error
-      message.error(err.message || '识别失败')
-      throw error
-    } finally {
-      identifyState.value.identifying = false
+      identifyResult.value.videoFileId = fileId
+      // 上传后需要再调用识别接口获取人脸信息
+      // 暂时清空，等待后续识别
+      identifyResult.value.sessionId = ''
+      identifyResult.value.faceId = ''
+      identifyResult.value.faceStartTime = 0
+      identifyResult.value.faceEndTime = 0
     }
+
+    return { ...identifyResult.value }
   }
 
   function resetVideoState(): void {
@@ -135,7 +142,7 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
     videoState.value.videoSource = null
     videoState.value.previewVideoUrl = ''
     videoState.value.selectorVisible = false
-    resetIdentifyState()
+    resetIdentifyResult()
   }
 
   function getVideoPreviewUrl(video: Video): string {
@@ -149,22 +156,23 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration {
     return 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjExMCIgdmlld0JveD0iMCAwIDIwMCAxMTAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxyZWN0IHdpZHRoPSIyMDAiIGhlaWdodD0iMTEwIiBmaWxsPSIjMzc0MTUxIi8+CjxwYXRoIGQ9Ik04NSA0NUwxMTUgNjVMMTA1IDg1TDc1IDc1TDg1IDQ1WiIgZmlsbD0iIzU3MjY1MSIvPgo8L3N2Zz4K'
   }
 
-  function resetIdentifyState(): void {
-    identifyState.value.identified = false
-    identifyState.value.sessionId = ''
-    identifyState.value.faceId = ''
-    identifyState.value.videoFileId = null
+  function resetIdentifyResult(): void {
+    identifyResult.value.sessionId = ''
+    identifyResult.value.faceId = ''
+    identifyResult.value.videoFileId = null
   }
 
   return {
     videoState,
-    identifyState,
+    identifyResult,
+    hasVideo,
+    isIdentified,
     faceDuration,
     handleFileUpload,
     handleVideoSelect,
     performFaceRecognition,
     resetVideoState,
-    resetIdentifyState,
+    resetIdentifyResult,
     getVideoPreviewUrl,
   }
 }
diff --git a/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts b/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts
index 64120f1aea..245b0c1d97 100644
--- a/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts
+++ b/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts
@@ -1,7 +1,11 @@
 /**
- * @fileoverview useIdentifyFaceController Hook - 主控制器
+ * @fileoverview useIdentifyFaceController Hook - 主控制器（重构版）
  *
- * 职责：协调语音、视频、Pipeline 各个子模块，提供统一的外部接口
+ * 设计理念：
+ * - 所有操作统一通过 Pipeline 状态机
+ * - 移除独立的 identifyState，使用 pipeline 状态
+ * - 点击"生成配音" → 运行到 ready 状态
+ * - 点击"生成数字人视频" → 从 ready 继续 → completed
  *
  * 模块依赖关系:
  *   ┌─────────────────────────────────────────────────┐
@@ -9,7 +13,7 @@
  *   │   ┌──────────────┐  ┌──────────────┐  ┌───────────┐│
  *   │   │   Voice      │  │  Digital     │  │ Pipeline  ││
  *   │   │   Generation  │  │  Human       │  │           ││
- *   │   │              │  │  Generation  │  │           ││
+ *   │   │              │  │  Generation  │  │  状态机    ││
  *   │   └──────────────┘  └──────────────┘  └───────────┘│
  *   └─────────────────────────────────────────────────┘
  */
@@ -17,7 +21,6 @@
 import { computed } from 'vue'
 import { message } from 'ant-design-vue'
 import type {
-  UseIdentifyFaceController,
   VoiceMeta,
 } from '../types/identify-face'
 import { useVoiceGeneration } from './useVoiceGeneration'
@@ -31,36 +34,35 @@ const MAX_TEXT_LENGTH = 4000
 /**
  * 主控制器 Hook
  */
-export function useIdentifyFaceController(): UseIdentifyFaceController {
+export function useIdentifyFaceController() {
   // 子 Hooks
   const voice = useVoiceGeneration()
   const digitalHuman = useDigitalHumanGeneration()
 
-  // Pipeline 流程配置（使用新的极简状态机）
+  // Pipeline 流程配置
   const pipeline = useSimplePipeline({
     uploadVideo: async (_file: File) => {
       // 上传已经在 handleFileUpload 中处理
-      // 这里直接返回 fileId
-      return digitalHuman.identifyState.value.videoFileId || ''
+      return digitalHuman.identifyResult.value.videoFileId || ''
     },
     recognizeFromLibrary: async (video: any) => {
       await digitalHuman.handleVideoSelect(video)
-      await digitalHuman.performFaceRecognition()
+      const result = await digitalHuman.performFaceRecognition()
       return {
-        sessionId: digitalHuman.identifyState.value.sessionId,
-        faceId: digitalHuman.identifyState.value.faceId,
-        startTime: digitalHuman.identifyState.value.faceStartTime,
-        endTime: digitalHuman.identifyState.value.faceEndTime,
+        sessionId: result.sessionId,
+        faceId: result.faceId,
+        startTime: result.faceStartTime,
+        endTime: result.faceEndTime,
         duration: digitalHuman.faceDuration.value,
       }
     },
     recognizeUploaded: async (_fileId: string | number) => {
-      await digitalHuman.performFaceRecognition()
+      const result = await digitalHuman.performFaceRecognition()
       return {
-        sessionId: digitalHuman.identifyState.value.sessionId,
-        faceId: digitalHuman.identifyState.value.faceId,
-        startTime: digitalHuman.identifyState.value.faceStartTime,
-        endTime: digitalHuman.identifyState.value.faceEndTime,
+        sessionId: result.sessionId,
+        faceId: result.faceId,
+        startTime: result.faceStartTime,
+        endTime: result.faceEndTime,
         duration: digitalHuman.faceDuration.value,
       }
     },
@@ -93,30 +95,24 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
     const hasVideo = digitalHuman.videoState.value.uploadedVideo || digitalHuman.videoState.value.selectedVideo
     const hasBasicConfig = hasText && hasVoice && hasVideo
 
-    // 未识别时只需要基础配置
-    if (!digitalHuman.identifyState.value.identified) return !!hasBasicConfig
+    // 未识别或未到 ready 状态需要基础配置
+    if (!pipeline.isReady.value) return !!hasBasicConfig
 
-    // 已识别后需要音频生成并通过校验
-    return !!(
-      hasBasicConfig &&
-      voice.audioState.value.generated &&
-      validationPassed.value
-    )
+    // 已到 ready 状态可以生成
+    return true
   })
 
   /** 最大文本长度（根据人脸时长动态计算） */
   const maxTextLength = computed(() => {
-    const isIdentified = digitalHuman.identifyState.value.identified
     const faceDuration = digitalHuman.faceDuration.value
-    if (!isIdentified || faceDuration <= 0) return MAX_TEXT_LENGTH
+    if (faceDuration <= 0) return MAX_TEXT_LENGTH
     return Math.min(MAX_TEXT_LENGTH, Math.floor(voice.suggestedMaxChars.value * 1.2))
   })
 
   /** 文本框占位符提示 */
   const textareaPlaceholder = computed(() => {
-    const isIdentified = digitalHuman.identifyState.value.identified
     const faceDuration = digitalHuman.faceDuration.value
-    if (isIdentified && faceDuration > 0) {
+    if (faceDuration > 0) {
       return `请输入文案，建议不超过${voice.suggestedMaxChars.value}字以确保与视频匹配`
     }
     return '请输入你想让角色说话的内容'
@@ -131,12 +127,6 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
   /** 音频时长显示（秒） */
   const audioDurationSec = computed(() => (voice.audioState.value.durationMs / 1000).toFixed(1))
 
-  /** 是否显示生成提示 */
-  const showGenerateHint = computed(() =>
-    digitalHuman.identifyState.value.identified &&
-    (!voice.audioState.value.generated || !validationPassed.value)
-  )
-
   /** 音频播放 URL */
   const audioUrl = computed(() => {
     const audio = voice.audioState.value.generated
@@ -145,12 +135,12 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
   })
 
   /**
-   * 校验是否通过（计算属性）
-   * 规则：音频时长 <= 人脸时长（Kling 要求音频不能超过人脸区间）
+   * 校验是否通过
+   * 规则：音频时长 <= 人脸时长
    */
   const validationPassed = computed(() => {
-    const faceDuration = Number(faceDurationSec.value)
-    const audioDuration = Number(audioDurationSec.value)
+    const faceDuration = digitalHuman.faceDuration.value
+    const audioDuration = voice.audioState.value.durationMs
     return audioDuration <= faceDuration
   })
 
@@ -162,12 +152,48 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
   function resetAllStates(): void {
     voice.resetAudioState()
     digitalHuman.resetVideoState()
-    digitalHuman.resetIdentifyState()
     pipeline.reset()
   }
 
   /**
-   * 生成数字人视频 - 使用新的 Pipeline API
+   * 生成配音 - 运行 Pipeline 到 ready 状态
+   */
+  async function generateAudio(): Promise<void> {
+    const hasVideo = digitalHuman.videoState.value.uploadedVideo || digitalHuman.videoState.value.selectedVideo
+    const hasText = voice.ttsText.value.trim()
+    const hasVoice = voice.selectedVoiceMeta.value
+
+    if (!hasText) {
+      message.warning('请输入文案内容')
+      return
+    }
+
+    if (!hasVoice) {
+      message.warning('请选择音色')
+      return
+    }
+
+    if (!hasVideo) {
+      message.warning('请先选择视频')
+      return
+    }
+
+    try {
+      // 运行流程到 ready 状态（包含识别、生成、校验）
+      await pipeline.run({
+        videoFile: digitalHuman.videoState.value.videoFile,
+        selectedVideo: digitalHuman.videoState.value.selectedVideo,
+        text: voice.ttsText.value,
+        voice: voice.selectedVoiceMeta.value,
+        speechRate: voice.speechRate.value,
+      })
+    } catch {
+      // 错误已在 Pipeline 中处理
+    }
+  }
+
+  /**
+   * 生成数字人视频 - 从 ready 状态继续到 completed
    */
   async function generateDigitalHuman(): Promise<void> {
     if (!canGenerate.value) {
@@ -189,16 +215,18 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
     }
 
     try {
-      // 运行流程到 ready 状态
-      await pipeline.run({
-        videoFile: digitalHuman.videoState.value.videoFile,
-        selectedVideo: digitalHuman.videoState.value.selectedVideo,
-        text,
-        voice: voiceMeta,
-        speechRate: voice.speechRate.value,
-      })
+      // 如果还没到 ready 状态，先运行到 ready
+      if (!pipeline.isReady.value) {
+        await pipeline.run({
+          videoFile: digitalHuman.videoState.value.videoFile,
+          selectedVideo: digitalHuman.videoState.value.selectedVideo,
+          text,
+          voice: voiceMeta,
+          speechRate: voice.speechRate.value,
+        })
+      }
 
-      // 如果到达 ready 状态，自动创建任务
+      // 如果到达 ready 状态，创建任务
       if (pipeline.isReady.value) {
         await pipeline.createTask()
         // 任务提交成功后，重置所有状态
@@ -242,7 +270,7 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
   function handleSelectUpload(): void {
     digitalHuman.videoState.value.videoSource = 'upload'
     digitalHuman.videoState.value.selectedVideo = null
-    digitalHuman.resetIdentifyState()
+    digitalHuman.resetIdentifyResult()
     pipeline.reset()
   }
 
@@ -285,19 +313,6 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
 
   // ==================== 返回接口 ====================
 
-  /**
-   * 包装的音频生成方法（延迟识别）
-   * 在生成音频前先执行人脸识别
-   */
-  async function generateAudio(): Promise<void> {
-    // 如果有视频但未识别，先执行识别
-    const hasVideo = digitalHuman.videoState.value.uploadedVideo || digitalHuman.videoState.value.selectedVideo
-    if (hasVideo && !digitalHuman.identifyState.value.identified) {
-      await digitalHuman.performFaceRecognition()
-    }
-    await voice.generateAudio()
-  }
-
   return {
     // 语音生成模块
     ttsText: voice.ttsText,
@@ -311,13 +326,13 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
 
     // 数字人生成模块
     videoState: digitalHuman.videoState,
-    identifyState: digitalHuman.identifyState,
+    identifyResult: digitalHuman.identifyResult,
+    isIdentified: digitalHuman.isIdentified,
     faceDuration: digitalHuman.faceDuration,
-    performFaceRecognition: digitalHuman.performFaceRecognition,
     handleFileUpload: digitalHuman.handleFileUpload,
     getVideoPreviewUrl: digitalHuman.getVideoPreviewUrl,
     resetVideoState: digitalHuman.resetVideoState,
-    resetIdentifyState: digitalHuman.resetIdentifyState,
+    resetIdentifyResult: digitalHuman.resetIdentifyResult,
 
     // 业务方法
     generateDigitalHuman,
@@ -345,11 +360,10 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
     speechRateDisplay,
     faceDurationSec,
     audioDurationSec,
-    showGenerateHint,
     audioUrl,
     validationPassed,
 
-    // Pipeline 状态
+    // Pipeline 状态（单一状态源）
     pipelineState: pipeline.state,
     pipelineStateLabel: pipeline.stateLabel,
     pipelineStateDescription: pipeline.stateDescription,
@@ -360,8 +374,6 @@ export function useIdentifyFaceController(): UseIdentifyFaceController {
     pipelineProgress: pipeline.progress,
     pipelineCurrentStepIndex: pipeline.currentStepIndex,
     pipelineError: pipeline.error,
-    runPipeline: pipeline.run,
-    createPipelineTask: pipeline.createTask,
     retryPipeline: pipeline.retry,
     resetPipeline: pipeline.reset,
   }
diff --git a/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts b/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts
index 53663a8d60..51cdf3b67e 100644
--- a/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts
+++ b/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts
@@ -55,8 +55,8 @@ export function useVoiceGeneration(): UseVoiceGeneration {
     try {
       const params = {
         inputText: ttsText.value,
-        voiceConfigId: voice.rawId || extractIdFromString(voice.id),
-        speechRate: speechRate.value || DEFAULT_SPEECH_RATE,
+        voiceConfigId: voice.rawId ?? extractIdFromString(voice.id),
+        speechRate: speechRate.value,
         audioFormat: 'mp3' as const,
         providerType: DEFAULT_VOICE_PROVIDER,
       }
@@ -85,48 +85,60 @@ export function useVoiceGeneration(): UseVoiceGeneration {
     }
   }
 
+  /**
+   * 解析音频时长（浏览器环境）
+   * 使用 HTML5 Audio API，添加安全边距避免精度误差
+   */
   async function parseAudioDuration(base64Data: string): Promise<number> {
     const base64 = base64Data.includes(',') ? base64Data.split(',')[1] : base64Data
     const binaryString = window.atob(base64)
     const bytes = new Uint8Array(binaryString.length)
-    for (let i = 0; i < binaryString.length; i++) {
+    for (let i = 0; i < bytes.length; i++) {
       bytes[i] = binaryString.charCodeAt(i)
     }
 
-    return new Promise(function(resolve, reject) {
+    return new Promise<number>(function(resolve, reject) {
       const blob = new Blob([bytes], { type: 'audio/mp3' })
       const audio = new Audio()
       const objectUrl = URL.createObjectURL(blob)
 
-      // 超时机制：5秒后拒绝
       const timeoutId = setTimeout(function() {
-        URL.revokeObjectURL(objectUrl)
+        cleanup()
         reject(new Error('音频时长解析超时'))
-      }, 5000)
+      }, 10000)
 
-      function onLoadedMetadata() {
+      function cleanup() {
         clearTimeout(timeoutId)
         URL.revokeObjectURL(objectUrl)
+        audio.removeEventListener('loadedmetadata', onLoadedMetadata)
+        audio.removeEventListener('error', onError)
+        audio.removeEventListener('canplay', onLoadedMetadata)
+      }
 
+      function onLoadedMetadata() {
         const duration = audio.duration
-        if (!isFinite(duration) || duration <= 0 || isNaN(duration)) {
-          reject(new Error(`音频时长无效: ${duration}，请检查音频格式是否正确`))
+        if (!isFinite(duration) || duration <= 0) {
+          cleanup()
+          reject(new Error(`音频时长无效: ${duration}`))
           return
         }
 
-        const durationMs = Math.round(duration * 1000)
-        console.log('[parseAudioDuration] 音频时长解析成功:', durationMs, 'ms')
+        // 减去安全边距(200ms)，避免因解析误差导致 sound_end_time 超过实际音频时长
+        const durationMs = Math.floor(duration * 1000) - 200
+        const rawDurationMs = Math.floor(duration * 1000)
+        console.log('[parseAudioDuration] 解析成功:', durationMs, 'ms (原始:', rawDurationMs, 'ms)')
+        cleanup()
         resolve(durationMs)
       }
 
       function onError() {
-        clearTimeout(timeoutId)
-        URL.revokeObjectURL(objectUrl)
+        cleanup()
         reject(new Error('音频解析失败，请检查音频格式'))
       }
 
       audio.addEventListener('loadedmetadata', onLoadedMetadata)
       audio.addEventListener('error', onError)
+      audio.addEventListener('canplay', onLoadedMetadata, { once: true })
       audio.src = objectUrl
       audio.load()
     })
diff --git a/frontend/app/web-gold/src/views/kling/types/identify-face.ts b/frontend/app/web-gold/src/views/kling/types/identify-face.ts
index 78c1f7cd46..c4fbcb8078 100644
--- a/frontend/app/web-gold/src/views/kling/types/identify-face.ts
+++ b/frontend/app/web-gold/src/views/kling/types/identify-face.ts
@@ -44,6 +44,17 @@ export interface IdentifyState {
   videoFileId: string | number | null
 }
 
+/**
+ * 人脸识别结果接口（不包含状态标志）
+ */
+export interface IdentifyResult {
+  sessionId: string
+  faceId: string
+  faceStartTime: number
+  faceEndTime: number
+  videoFileId: string | number | null
+}
+
 /**
  * 音频状态接口
  */
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java
index 2a57d8f86a..028637bb35 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java
@@ -365,86 +365,14 @@ public class LatentsyncPollingService {
     }
 
     /**
-     * 保存视频到OSS - 直接保存到 infra_file 避免重复
-     * 返回保存结果，包含URL、文件大小和文件ID
+     * 保存远程视频URL
+     * 简化版：直接保存Kling返回的URL，不再下载上传到OSS
      */
     private OssSaveResult saveVideoToOss(TikDigitalHumanTaskDO task, String remoteVideoUrl) throws Exception {
-        log.info("[saveVideoToOss][任务({})开始下载并保存视频到OSS][remoteUrl={}]", task.getId(), remoteVideoUrl);
+        log.info("[saveVideoToOss][任务({})直接保存Kling URL][url={}]", task.getId(), remoteVideoUrl);
 
-        try {
-            // 1. 下载远程视频文件
-            byte[] videoBytes = downloadRemoteFile(remoteVideoUrl);
-
-            // 2. 内存检查：超过50MB记录警告
-            int sizeMB = videoBytes.length / 1024 / 1024;
-            if (sizeMB > 50) {
-                log.warn("[saveVideoToOss][任务({})视频文件较大][size={}MB]", task.getId(), sizeMB);
-            }
-
-            // 3. 获取OSS目录和文件名
-            Long userId = task.getUserId();
-            String baseDirectory = ossInitService.getOssDirectoryByCategory(userId, "generate");
-            String fileName = String.format("数字人视频_%d_%d.mp4", task.getId(), System.currentTimeMillis());
-
-            // 4. 获取FileClient并上传到OSS
-            FileClient client = fileConfigService.getMasterFileClient();
-            if (client == null) {
-                throw new Exception("获取FileClient失败");
-            }
-
-            // 5. 生成上传路径（包含日期前缀和时间戳后缀）
-            String filePath = generateUploadPath(fileName, baseDirectory);
-
-            // 6. 上传到OSS
-            String presignedUrl = client.upload(videoBytes, filePath, "video/mp4");
-
-            // 7. 移除预签名参数，获取基础URL
-            String cleanUrl = HttpUtils.removeUrlQuery(presignedUrl);
-
-            // 8. 保存到 infra_file 表
-            FileDO infraFile = new FileDO()
-                    .setConfigId(client.getId())
-                    .setName(fileName)
-                    .setPath(filePath)
-                    .setUrl(cleanUrl)
-                    .setType("video/mp4")
-                    .setSize(videoBytes.length);
-            fileMapper.insert(infraFile);
-            Long infraFileId = infraFile.getId();
-
-            log.info("[saveVideoToOss][任务({})视频保存完成][infraFileId={}, size={}MB]",
-                    task.getId(), infraFileId, sizeMB);
-            return new OssSaveResult(cleanUrl, videoBytes.length, filePath, infraFileId);
-
-        } catch (Exception e) {
-            log.error("[saveVideoToOss][任务({})保存视频失败][remoteUrl={}]", task.getId(), remoteVideoUrl, e);
-            return new OssSaveResult(remoteVideoUrl, 0, null, null);
-        }
-    }
-
-    /**
-     * 生成上传路径（与 FileService 保持一致）
-     */
-    private String generateUploadPath(String name, String directory) {
-        String prefix = cn.hutool.core.date.LocalDateTimeUtil.format(
-                cn.hutool.core.date.LocalDateTimeUtil.now(),
-                cn.hutool.core.date.DatePattern.PURE_DATE_PATTERN);
-        String suffix = String.valueOf(System.currentTimeMillis());
-
-        String ext = cn.hutool.core.io.FileUtil.extName(name);
-        if (StrUtil.isNotEmpty(ext)) {
-            name = cn.hutool.core.io.FileUtil.mainName(name) + "_" + suffix + "." + ext;
-        } else {
-            name = name + "_" + suffix;
-        }
-
-        if (StrUtil.isNotEmpty(prefix)) {
-            name = prefix + "/" + name;
-        }
-        if (StrUtil.isNotEmpty(directory)) {
-            name = directory + "/" + name;
-        }
-        return name;
+        // 直接返回Kling URL，不上传到OSS
+        return new OssSaveResult(remoteVideoUrl, 0, null, null);
     }
 
     /**
@@ -480,57 +408,39 @@ public class LatentsyncPollingService {
         }
     }
 
-    /**
-     * 下载远程文件 - 内存优化
-     */
-    private byte[] downloadRemoteFile(String remoteUrl) throws Exception {
-        log.info("[downloadRemoteFile][下载文件][url={}]", remoteUrl);
-
-        try (HttpResponse response = HttpRequest.get(remoteUrl)
-                .execute()) {
-
-            if (!response.isOk()) {
-                throw new Exception("下载文件失败: HTTP " + response.getStatus());
-            }
-
-            // 流式读取：分块处理避免大文件OOM
-            byte[] bytes = response.bodyBytes();
-            int sizeMB = bytes.length / 1024 / 1024;
-            log.info("[downloadRemoteFile][文件下载完成][size={} bytes, {}MB]", bytes.length, sizeMB);
-            return bytes;
-        }
-    }
-
     /**
      * 保存结果视频到用户文件表
+     * 如果 OSS 保存失败（infraFileId 为 null），直接保存外部 URL
      */
     private void saveResultVideoToUserFiles(TikDigitalHumanTaskDO task, OssSaveResult saveResult) {
         try {
             Long userId = task.getUserId();
-            Long infraFileId = saveResult.getInfraFileId();
 
-            // 验证必要参数
-            if (userId == null || infraFileId == null) {
-                log.warn("[saveResultVideoToUserFiles][任务({})参数不完整，无法保存][userId={}, infraFileId={}]",
-                        task.getId(), userId, infraFileId);
+            if (userId == null) {
+                log.warn("[saveResultVideoToUserFiles][任务({})userId为空，无法保存]", task.getId());
                 return;
             }
 
-            // 创建用户文件记录
+            // 创建用户文件记录（支持外部 URL，fileId 可为空）
             TikUserFileDO userFile = new TikUserFileDO();
             userFile.setUserId(userId);
-            userFile.setFileId(infraFileId);
-            userFile.setFileName(String.format("数字人视频_%d_%d.mp4", task.getId(), System.currentTimeMillis()));
+            userFile.setFileId(saveResult.getInfraFileId()); // OSS保存失败时为null，表示外部URL
+            userFile.setFileName(String.format("数字人视频_%d.mp4", task.getId()));
             userFile.setFileType("video/mp4");
             userFile.setFileCategory("generate");
             userFile.setFileUrl(saveResult.getUrl());
             userFile.setFilePath(saveResult.getFilePath());
-            userFile.setFileSize((long) saveResult.getFileSize());
+            userFile.setFileSize(saveResult.getInfraFileId() != null ? (long) saveResult.getFileSize() : null);
 
             userFileMapper.insert(userFile);
 
-            log.info("[saveResultVideoToUserFiles][任务({})文件记录已保存][userFileId={}, infraFileId={}]",
-                    task.getId(), userFile.getId(), infraFileId);
+            if (saveResult.getInfraFileId() != null) {
+                log.info("[saveResultVideoToUserFiles][任务({})已保存到OSS][userFileId={}, infraFileId={}]",
+                        task.getId(), userFile.getId(), saveResult.getInfraFileId());
+            } else {
+                log.info("[saveResultVideoToUserFiles][任务({})已保存外部URL][userFileId={}, url={}]",
+                        task.getId(), userFile.getId(), saveResult.getUrl());
+            }
         } catch (Exception e) {
             log.error("[saveResultVideoToUserFiles][任务({})保存失败]", task.getId(), e);
         }