@@ -13,10 +13,16 @@
< h3 > 文案 < / h3 >
< a-textarea
v -model :value = "ttsText"
placeholder= "请输入你想让角色说话的内容 "
: placeholder= "textareaPlaceholder "
:rows = "4"
:maxlength = "maxTextLength"
:show-count = "true"
class = "tts-textarea"
/ >
< div v-if = "identified && faceDuration > 0" class="text-hint" >
< span class = "hint-icon" > 💡 < / span >
< span > 视频中人脸出现时长约 { { ( faceDuration / 1000 ) . toFixed ( 1 ) } } 秒 , 建议文案不超过 { { suggestedMaxChars } } 字 < / span >
< / div >
< / div >
<!-- 音色选择 -- >
@@ -139,6 +145,74 @@
< / div >
< / div >
<!-- 配音生成与校验 ( 仅在识别后显示 ) -- >
< div v-if = "identified" class="section audio-generation-section" >
< h3 > 配音生成与校验 < / h3 >
<!-- 生成配音按钮 -- >
< div class = "generate-audio-row" >
< a-button
type = "default"
size = "large"
:disabled = "!canGenerateAudio"
:loading = "generatingAudio"
block
@click ="handleGenerateAudio"
>
{ { generatingAudio ? '生成中...' : '生成配音(用于校验时长)' } }
< / a-button >
< / div >
<!-- 音频预览 ( 生成后显示 ) -- >
< div v-if = "generatedAudio" class="audio-preview" >
< div class = "audio-info" >
< h4 > 生成的配音 < / h4 >
< div class = "duration-info" >
< span class = "label" > 音频时长 : < / span >
< span class = "value" > { { ( audioDurationMs / 1000 ) . toFixed ( 1 ) } } 秒 < / span >
< / div >
< div class = "duration-info" >
< span class = "label" > 人脸区间 : < / span >
< span class = "value" > { { ( faceDuration / 1000 ) . toFixed ( 1 ) } } 秒 < / span >
< / div >
< div class = "duration-info" : class = "{ 'validation-passed': audioValidationPassed, 'validation-failed': !audioValidationPassed }" >
< span class = "label" > 校验结果 : < / span >
< span class = "value" >
{ { audioValidationPassed ? '✅ 通过' : '❌ 不通过( 需至少2秒重合) ' } }
< / span >
< / div >
< / div >
<!-- 音频播放器 -- >
< div class = "audio-player" >
< audio
v-if = "generatedAudio.audioBase64"
:src = "`data:audio/mp3;base64,${generatedAudio.audioBase64}`"
controls
class = "audio-element"
/ >
< audio
v-else-if = "generatedAudio.audioUrl"
:src = "generatedAudio.audioUrl"
controls
class = "audio-element"
/ >
< / div >
<!-- 重新生成按钮 -- >
< div class = "regenerate-row" >
< a-button
type = "link"
size = "small"
@click ="handleGenerateAudio"
:loading = "generatingAudio"
>
重新生成
< / a-button >
< / div >
< / div >
< / div >
<!-- 按钮组 -- >
< div class = "action-buttons" >
< a-button
@@ -162,6 +236,12 @@
>
{ { isGenerating ? '生成中...' : '生成数字人视频' } }
< / a-button >
<!-- 添加提示信息 -- >
< div v-if = "canGenerate && !audioValidationPassed" class="generate-hint" >
< span class = "hint-icon" > ⚠ ️ < / span >
< span > 请先生成配音并通过时长校验 < / span >
< / div >
< / div >
< / div >
@@ -191,6 +271,7 @@ import { ref, computed, onMounted } from 'vue'
import { message } from 'ant-design-vue'
import { SoundOutlined , LoadingOutlined } from '@ant-design/icons-vue'
import { createKlingTaskAndIdentify , createLipSyncTask , getLipSyncTask } from '@/api/kling'
import { getDigitalHumanTask } from '@/api/digitalHuman'
import { MaterialService } from '@/api/material'
import { VoiceService } from '@/api/voice'
import { useVoiceCopyStore } from '@/stores/voiceCopy'
@@ -213,8 +294,44 @@ const currentTaskError = ref('') // 任务错误信息
// 识别结果存储
const identifySessionId = ref ( '' ) // 人脸识别会话ID
const identifyFaceId = ref ( '' ) // 选中的人脸ID
const identifyFaceStartTime = ref ( 0 ) // 人脸可对口型区间起点时间( ms)
const identifyFaceEndTime = ref ( 0 ) // 人脸可对口型区间终点时间( ms)
const identifyVideoFileId = ref ( null ) // 视频文件ID
// 配音预生成状态
const generatedAudio = ref ( null ) // 生成的音频数据
const audioDurationMs = ref ( 0 ) // 音频时长(毫秒)
const audioValidationPassed = ref ( false ) // 时长校验是否通过
const generatingAudio = ref ( false ) // 是否正在生成配音
// 人脸区间时长(毫秒)
const faceDuration = computed ( ( ) => identifyFaceEndTime . value - identifyFaceStartTime . value )
// 基于人脸时长计算建议的最大文案字数( 中文约3.5字/秒)
const suggestedMaxChars = computed ( ( ) => {
const durationSec = faceDuration . value / 1000
// 语速影响:语速越高,单位时间可说更多字
const adjustedRate = speechRate . value || 1.0
return Math . floor ( durationSec * 3.5 * adjustedRate )
} )
// 最大文案长度限制(略大于建议值,留有余地)
const maxTextLength = computed ( ( ) => {
if ( ! identified . value || faceDuration . value <= 0 ) {
return 4000 // 未识别时使用默认限制
}
// 最大字数 = 建议字数 * 1.2, 但不超过4000
return Math . min ( 4000 , Math . floor ( suggestedMaxChars . value * 1.2 ) )
} )
// 文案输入框提示文字
const textareaPlaceholder = computed ( ( ) => {
if ( identified . value && faceDuration . value > 0 ) {
return ` 请输入文案,建议不超过 ${ suggestedMaxChars . value } 字以确保与视频匹配 `
}
return '请输入你想让角色说话的内容'
} )
// 音频试听缓存
const previewAudioCache = new Map ( )
const MAX _PREVIEW _CACHE _SIZE = 50
@@ -259,8 +376,17 @@ const canGenerate = computed(() => {
const hasText = ttsText . value . trim ( )
const hasVoice = selectedVoiceMeta . value
const hasVideo = uploadedVideo . value
const isIdentified = identified . value // 必须先识别
const notGenerating = ! isGenerating . value
return ! ! ( hasText && hasVoice && hasVideo && notGenerating )
const audioValidated = audioValidationPassed . value // 必须通过音频时长校验
return ! ! ( hasText && hasVoice && hasVideo && isIdentified && notGenerating && audioValidated )
} )
// 新增:生成配音的条件(不需要通过校验,只需要基本的文案和音色)
const canGenerateAudio = computed ( ( ) => {
const hasText = ttsText . value . trim ( )
const hasVoice = selectedVoiceMeta . value
return ! ! ( hasText && hasVoice && ! generatingAudio . value )
} )
// UI 控制
@@ -434,12 +560,17 @@ const handleIdentify = async () => {
// 保存识别结果
identifySessionId . value = res . data . sessionId
identifyVideoFileId . value = res . data . fileId
identifyVideoFil eId . value = res . data . faceId
identifyFac eId . value = res . data . faceId
// 保存人脸时间信息,用于音频插入时间
identifyFaceStartTime . value = res . data . startTime || 0
identifyFaceEndTime . value = res . data . endTime || 0
identified . value = true
message . success ( '识别完成!' )
console . log ( '识别结果:' , res . data )
// 识别成功后, 延迟1.5秒自动生成数字人视频
await handleGenerate ( )
// 显示识别成功提示,包含人脸区间信息
const durationSec = ( identifyFaceEndTime . value - identifyFaceStartTime . value ) / 1000
message . success ( ` 识别完成!人脸出现时长约 ${ durationSec . toFixed ( 1 ) } 秒,建议文案不超过 ${ suggestedMaxChars . value } 字 ` )
console . log ( '识别结果:' , res . data )
// 不再自动触发生成,让用户先调整文案
} catch ( error ) {
message . error ( error . message || '识别失败' )
} finally {
@@ -447,6 +578,147 @@ const handleIdentify = async () => {
}
}
// 预生成配音(用于时长校验)
const handleGenerateAudio = async ( ) => {
if ( ! ttsText . value . trim ( ) ) {
message . warning ( '请输入文案内容' )
return
}
const voice = selectedVoiceMeta . value
if ( ! voice ) {
message . warning ( '请选择音色' )
return
}
generatingAudio . value = true
try {
const params = {
inputText : ttsText . value ,
voiceConfigId : voice . rawId || extractIdFromString ( voice . id ) ,
speechRate : speechRate . value || 1.0 ,
audioFormat : 'mp3'
}
const res = await VoiceService . synthesize ( params )
if ( res . code === 0 ) {
generatedAudio . value = res . data
// ✅ 严格依赖前端解析的真实时长( TTS API的durationMs不可靠)
if ( ! res . data . audioBase64 ) {
throw new Error ( '未收到音频数据,无法进行时长解析' )
}
try {
audioDurationMs . value = await parseAudioDuration ( res . data . audioBase64 )
// 自动校验时长
validateAudioDuration ( )
message . success ( '配音生成成功!' )
} catch ( error ) {
// 解析失败则终止流程,要求用户重新生成
console . error ( '❌ 音频解析失败:' , error )
message . error ( '音频解析失败,请重新生成配音' )
audioDurationMs . value = 0
generatedAudio . value = null
audioValidationPassed . value = false
}
} else {
throw new Error ( res . msg || '配音生成失败' )
}
} catch ( error ) {
console . error ( 'generateAudio error:' , error )
message . error ( error . message || '配音生成失败' )
} finally {
generatingAudio . value = false
}
}
/**
* 解析音频Base64数据并获取实际时长
* @param {string} base64Data - Base64音频数据( 可包含 data:audio/...;base64, 前缀)
* @returns {Promise<number>} 音频时长(毫秒)
*/
const parseAudioDuration = ( base64Data ) => {
return new Promise ( ( resolve , reject ) => {
try {
// 提取纯Base64数据( 移除data:...;base64,前缀)
const base64 = base64Data . includes ( ',' ) ? base64Data . split ( ',' ) [ 1 ] : base64Data
// Base64转二进制数据
const binaryString = window . atob ( base64 )
const bytes = new Uint8Array ( binaryString . length )
for ( let i = 0 ; i < binaryString . length ; i ++ ) {
bytes [ i ] = binaryString . charCodeAt ( i )
}
// 创建Blob对象
const blob = new Blob ( [ bytes ] , { type : 'audio/mp3' } )
// 创建音频对象并解析时长
const audio = new Audio ( )
const objectUrl = URL . createObjectURL ( blob )
audio . addEventListener ( 'loadedmetadata' , ( ) => {
URL . revokeObjectURL ( objectUrl )
const durationMs = Math . round ( audio . duration * 1000 ) // 转换为毫秒
console . log ( '✅ 音频解析完成:' , {
duration : audio . duration + '秒' ,
durationMs : durationMs + '毫秒'
} )
resolve ( durationMs )
} )
audio . addEventListener ( 'error' , ( error ) => {
URL . revokeObjectURL ( objectUrl )
console . warn ( '⚠️ 音频解析失败, 使用API返回的时长' )
reject ( error )
} )
// 设置音频源并加载
audio . src = objectUrl
audio . load ( )
} catch ( error ) {
console . error ( '❌ 音频解析异常:' , error )
reject ( error )
}
} )
}
// 校验音频时长与人脸时长的匹配性
// 要求: 音频与人脸区间至少重合2秒
const validateAudioDuration = ( ) => {
if ( ! identified . value || faceDuration . value <= 0 ) {
audioValidationPassed . value = false
return false
}
const faceStart = identifyFaceStartTime . value
const faceEnd = identifyFaceEndTime . value
const faceDurationMs = faceEnd - faceStart
const audioDuration = audioDurationMs . value
// 计算重合区间(简化:假设音频从人脸起点开始插入)
const overlapStart = faceStart
const overlapEnd = Math . min ( faceEnd , faceStart + audioDuration )
const overlapDuration = Math . max ( 0 , overlapEnd - overlapStart )
// 校验: 重合区间至少2秒
const isValid = overlapDuration >= 2000
audioValidationPassed . value = isValid
if ( ! isValid ) {
const overlapSec = ( overlapDuration / 1000 ) . toFixed ( 1 )
message . warning (
` 音频时长( ${ ( audioDuration / 1000 ) . toFixed ( 1 ) } 秒)与人脸区间( ${ ( faceDurationMs / 1000 ) . toFixed ( 1 ) } 秒)不匹配,重合部分仅 ${ overlapSec } 秒, 至少需要2秒 `
)
} else {
message . success ( '时长校验通过!' )
}
return isValid
}
// 生成数字人视频
const handleGenerate = async ( ) => {
if ( ! canGenerate . value ) {
@@ -454,6 +726,13 @@ const handleGenerate = async () => {
return
}
// 检查文案内容
const text = ttsText . value . trim ( )
if ( ! text ) {
message . warning ( '请输入文案内容' )
return
}
const voice = selectedVoiceMeta . value
if ( ! voice ) {
message . warning ( '请选择音色' )
@@ -474,9 +753,12 @@ const handleGenerate = async () => {
volume : 0 ,
guidanceScale : 1 ,
seed : 8888 ,
klingS essionI d : identifySessionId . value ,
klingFaceI d : identifyFaceId . value ,
aiProvider : 'kling'
kling_s ession_i d : identifySessionId . value ,
kling_face _i d : identifyFaceId . value ,
// 人脸可对口型时间区间,用于音频插入时间
kling _face _start _time : identifyFaceStartTime . value ,
kling _face _end _time : identifyFaceEndTime . value ,
ai _provider : 'kling'
}
const configId = voice . rawId || extractIdFromString ( voice . id )
@@ -486,6 +768,25 @@ const handleGenerate = async () => {
}
taskData . voiceConfigId = configId
// ✅ 新增: 传递预生成的音频给后端, 复用而不重复TTS
if ( generatedAudio . value && audioDurationMs . value > 0 ) {
taskData . pre _generated _audio = {
audioBase64 : generatedAudio . value . audioBase64 ,
format : generatedAudio . value . format || 'mp3'
}
// ✅ 新增:传递 sound_end_time 给可灵API( 音频结束时间)
// 可灵API要求: 音频从0开始, 所以结束时间 = 0 + 音频时长
taskData . sound _end _time = audioDurationMs . value
console . log ( '传递预生成音频给后端:' , {
soundEndTime : taskData . sound _end _time ,
hasAudioData : ! ! generatedAudio . value . audioBase64
} )
} else {
console . warn ( '⚠️ 未找到预生成音频, 将在后端重新TTS' )
}
message . loading ( '正在创建任务...' , 0 )
const res = await createLipSyncTask ( taskData )
message . destroy ( )
@@ -710,6 +1011,23 @@ let previewObjectUrl = ''
color : # fff ;
}
. text - hint {
display : flex ;
align - items : center ;
gap : 6 px ;
margin - top : 8 px ;
padding : 8 px 12 px ;
background : rgba ( 59 , 130 , 246 , 0.1 ) ;
border : 1 px solid rgba ( 59 , 130 , 246 , 0.2 ) ;
border - radius : 6 px ;
font - size : 13 px ;
color : # 94 a3b8 ;
}
. hint - icon {
font - size : 14 px ;
}
. voice - source - toggle {
display : inline - flex ;
border : 1 px solid rgba ( 59 , 130 , 246 , 0.2 ) ;
@@ -1088,4 +1406,83 @@ let previewObjectUrl = ''
grid - template - columns : 1 fr ;
}
}
/* 配音生成与校验样式 */
. audio - generation - section {
margin - bottom : 24 px ;
padding : 16 px ;
background : rgba ( 255 , 255 , 255 , 0.03 ) ;
border - radius : 12 px ;
border : 1 px solid rgba ( 59 , 130 , 246 , 0.15 ) ;
}
. generate - audio - row {
margin - bottom : 16 px ;
}
. audio - preview {
padding : 16 px ;
background : rgba ( 0 , 0 , 0 , 0.2 ) ;
border - radius : 8 px ;
}
. audio - info h4 {
color : # fff ;
margin - bottom : 12 px ;
font - size : 14 px ;
}
. duration - info {
display : flex ;
justify - content : space - between ;
margin - bottom : 8 px ;
font - size : 13 px ;
}
. duration - info . label {
color : var ( -- color - text - secondary ) ;
}
. duration - info . value {
color : # fff ;
font - weight : 600 ;
}
. duration - info . validation - passed . value {
color : # 52 c41a ;
}
. duration - info . validation - failed . value {
color : # ff4d4f ;
}
. audio - player {
margin : 16 px 0 ;
}
. audio - element {
width : 100 % ;
}
. regenerate - row {
text - align : center ;
margin - top : 12 px ;
}
. generate - hint {
display : flex ;
align - items : center ;
gap : 8 px ;
margin - top : 8 px ;
padding : 8 px 12 px ;
background : rgba ( 255 , 193 , 7 , 0.1 ) ;
border : 1 px solid rgba ( 255 , 193 , 7 , 0.3 ) ;
border - radius : 6 px ;
font - size : 13 px ;
color : # faad14 ;
}
. hint - icon {
font - size : 14 px ;
}
< / style >