feat: 功能优化
This commit is contained in:
@@ -13,10 +13,16 @@
|
||||
<h3>文案</h3>
|
||||
<a-textarea
|
||||
v-model:value="ttsText"
|
||||
placeholder="请输入你想让角色说话的内容"
|
||||
:placeholder="textareaPlaceholder"
|
||||
:rows="4"
|
||||
:maxlength="maxTextLength"
|
||||
:show-count="true"
|
||||
class="tts-textarea"
|
||||
/>
|
||||
<div v-if="identified && faceDuration > 0" class="text-hint">
|
||||
<span class="hint-icon">💡</span>
|
||||
<span>视频中人脸出现时长约 {{ (faceDuration / 1000).toFixed(1) }} 秒,建议文案不超过 {{ suggestedMaxChars }} 字</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 音色选择 -->
|
||||
@@ -139,6 +145,74 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 配音生成与校验(仅在识别后显示) -->
|
||||
<div v-if="identified" class="section audio-generation-section">
|
||||
<h3>配音生成与校验</h3>
|
||||
|
||||
<!-- 生成配音按钮 -->
|
||||
<div class="generate-audio-row">
|
||||
<a-button
|
||||
type="default"
|
||||
size="large"
|
||||
:disabled="!canGenerateAudio"
|
||||
:loading="generatingAudio"
|
||||
block
|
||||
@click="handleGenerateAudio"
|
||||
>
|
||||
{{ generatingAudio ? '生成中...' : '生成配音(用于校验时长)' }}
|
||||
</a-button>
|
||||
</div>
|
||||
|
||||
<!-- 音频预览(生成后显示) -->
|
||||
<div v-if="generatedAudio" class="audio-preview">
|
||||
<div class="audio-info">
|
||||
<h4>生成的配音</h4>
|
||||
<div class="duration-info">
|
||||
<span class="label">音频时长:</span>
|
||||
<span class="value">{{ (audioDurationMs / 1000).toFixed(1) }} 秒</span>
|
||||
</div>
|
||||
<div class="duration-info">
|
||||
<span class="label">人脸区间:</span>
|
||||
<span class="value">{{ (faceDuration / 1000).toFixed(1) }} 秒</span>
|
||||
</div>
|
||||
<div class="duration-info" :class="{ 'validation-passed': audioValidationPassed, 'validation-failed': !audioValidationPassed }">
|
||||
<span class="label">校验结果:</span>
|
||||
<span class="value">
|
||||
{{ audioValidationPassed ? '✅ 通过' : '❌ 不通过(需至少2秒重合)' }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 音频播放器 -->
|
||||
<div class="audio-player">
|
||||
<audio
|
||||
v-if="generatedAudio.audioBase64"
|
||||
:src="`data:audio/mp3;base64,${generatedAudio.audioBase64}`"
|
||||
controls
|
||||
class="audio-element"
|
||||
/>
|
||||
<audio
|
||||
v-else-if="generatedAudio.audioUrl"
|
||||
:src="generatedAudio.audioUrl"
|
||||
controls
|
||||
class="audio-element"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<!-- 重新生成按钮 -->
|
||||
<div class="regenerate-row">
|
||||
<a-button
|
||||
type="link"
|
||||
size="small"
|
||||
@click="handleGenerateAudio"
|
||||
:loading="generatingAudio"
|
||||
>
|
||||
重新生成
|
||||
</a-button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 按钮组 -->
|
||||
<div class="action-buttons">
|
||||
<a-button
|
||||
@@ -162,6 +236,12 @@
|
||||
>
|
||||
{{ isGenerating ? '生成中...' : '生成数字人视频' }}
|
||||
</a-button>
|
||||
|
||||
<!-- 添加提示信息 -->
|
||||
<div v-if="canGenerate && !audioValidationPassed" class="generate-hint">
|
||||
<span class="hint-icon">⚠️</span>
|
||||
<span>请先生成配音并通过时长校验</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -191,6 +271,7 @@ import { ref, computed, onMounted } from 'vue'
|
||||
import { message } from 'ant-design-vue'
|
||||
import { SoundOutlined, LoadingOutlined } from '@ant-design/icons-vue'
|
||||
import { createKlingTaskAndIdentify ,createLipSyncTask , getLipSyncTask} from '@/api/kling'
|
||||
import { getDigitalHumanTask } from '@/api/digitalHuman'
|
||||
import { MaterialService } from '@/api/material'
|
||||
import { VoiceService } from '@/api/voice'
|
||||
import { useVoiceCopyStore } from '@/stores/voiceCopy'
|
||||
@@ -213,8 +294,44 @@ const currentTaskError = ref('') // 任务错误信息
|
||||
// 识别结果存储
|
||||
const identifySessionId = ref('') // 人脸识别会话ID
|
||||
const identifyFaceId = ref('') // 选中的人脸ID
|
||||
const identifyFaceStartTime = ref(0) // 人脸可对口型区间起点时间(ms)
|
||||
const identifyFaceEndTime = ref(0) // 人脸可对口型区间终点时间(ms)
|
||||
const identifyVideoFileId = ref(null) // 视频文件ID
|
||||
|
||||
// 配音预生成状态
|
||||
const generatedAudio = ref(null) // 生成的音频数据
|
||||
const audioDurationMs = ref(0) // 音频时长(毫秒)
|
||||
const audioValidationPassed = ref(false) // 时长校验是否通过
|
||||
const generatingAudio = ref(false) // 是否正在生成配音
|
||||
|
||||
// 人脸区间时长(毫秒)
|
||||
const faceDuration = computed(() => identifyFaceEndTime.value - identifyFaceStartTime.value)
|
||||
|
||||
// 基于人脸时长计算建议的最大文案字数(中文约3.5字/秒)
|
||||
const suggestedMaxChars = computed(() => {
|
||||
const durationSec = faceDuration.value / 1000
|
||||
// 语速影响:语速越高,单位时间可说更多字
|
||||
const adjustedRate = speechRate.value || 1.0
|
||||
return Math.floor(durationSec * 3.5 * adjustedRate)
|
||||
})
|
||||
|
||||
// 最大文案长度限制(略大于建议值,留有余地)
|
||||
const maxTextLength = computed(() => {
|
||||
if (!identified.value || faceDuration.value <= 0) {
|
||||
return 4000 // 未识别时使用默认限制
|
||||
}
|
||||
// 最大字数 = 建议字数 * 1.2,但不超过4000
|
||||
return Math.min(4000, Math.floor(suggestedMaxChars.value * 1.2))
|
||||
})
|
||||
|
||||
// 文案输入框提示文字
|
||||
const textareaPlaceholder = computed(() => {
|
||||
if (identified.value && faceDuration.value > 0) {
|
||||
return `请输入文案,建议不超过${suggestedMaxChars.value}字以确保与视频匹配`
|
||||
}
|
||||
return '请输入你想让角色说话的内容'
|
||||
})
|
||||
|
||||
// 音频试听缓存
|
||||
const previewAudioCache = new Map()
|
||||
const MAX_PREVIEW_CACHE_SIZE = 50
|
||||
@@ -259,8 +376,17 @@ const canGenerate = computed(() => {
|
||||
const hasText = ttsText.value.trim()
|
||||
const hasVoice = selectedVoiceMeta.value
|
||||
const hasVideo = uploadedVideo.value
|
||||
const isIdentified = identified.value // 必须先识别
|
||||
const notGenerating = !isGenerating.value
|
||||
return !!(hasText && hasVoice && hasVideo && notGenerating)
|
||||
const audioValidated = audioValidationPassed.value // 必须通过音频时长校验
|
||||
return !!(hasText && hasVoice && hasVideo && isIdentified && notGenerating && audioValidated)
|
||||
})
|
||||
|
||||
// 新增:生成配音的条件(不需要通过校验,只需要基本的文案和音色)
|
||||
const canGenerateAudio = computed(() => {
|
||||
const hasText = ttsText.value.trim()
|
||||
const hasVoice = selectedVoiceMeta.value
|
||||
return !!(hasText && hasVoice && !generatingAudio.value)
|
||||
})
|
||||
|
||||
// UI 控制
|
||||
@@ -434,12 +560,17 @@ const handleIdentify = async () => {
|
||||
// 保存识别结果
|
||||
identifySessionId.value = res.data.sessionId
|
||||
identifyVideoFileId.value = res.data.fileId
|
||||
identifyVideoFileId.value = res.data.faceId
|
||||
identifyFaceId.value = res.data.faceId
|
||||
// 保存人脸时间信息,用于音频插入时间
|
||||
identifyFaceStartTime.value = res.data.startTime || 0
|
||||
identifyFaceEndTime.value = res.data.endTime || 0
|
||||
identified.value = true
|
||||
message.success('识别完成!')
|
||||
console.log( '识别结果:', res.data)
|
||||
// 识别成功后,延迟1.5秒自动生成数字人视频
|
||||
await handleGenerate()
|
||||
|
||||
// 显示识别成功提示,包含人脸区间信息
|
||||
const durationSec = (identifyFaceEndTime.value - identifyFaceStartTime.value) / 1000
|
||||
message.success(`识别完成!人脸出现时长约 ${durationSec.toFixed(1)} 秒,建议文案不超过 ${suggestedMaxChars.value} 字`)
|
||||
console.log('识别结果:', res.data)
|
||||
// 不再自动触发生成,让用户先调整文案
|
||||
} catch (error) {
|
||||
message.error(error.message || '识别失败')
|
||||
} finally {
|
||||
@@ -447,6 +578,147 @@ const handleIdentify = async () => {
|
||||
}
|
||||
}
|
||||
|
||||
// 预生成配音(用于时长校验)
|
||||
const handleGenerateAudio = async () => {
|
||||
if (!ttsText.value.trim()) {
|
||||
message.warning('请输入文案内容')
|
||||
return
|
||||
}
|
||||
|
||||
const voice = selectedVoiceMeta.value
|
||||
if (!voice) {
|
||||
message.warning('请选择音色')
|
||||
return
|
||||
}
|
||||
|
||||
generatingAudio.value = true
|
||||
try {
|
||||
const params = {
|
||||
inputText: ttsText.value,
|
||||
voiceConfigId: voice.rawId || extractIdFromString(voice.id),
|
||||
speechRate: speechRate.value || 1.0,
|
||||
audioFormat: 'mp3'
|
||||
}
|
||||
|
||||
const res = await VoiceService.synthesize(params)
|
||||
if (res.code === 0) {
|
||||
generatedAudio.value = res.data
|
||||
|
||||
// ✅ 严格依赖前端解析的真实时长(TTS API的durationMs不可靠)
|
||||
if (!res.data.audioBase64) {
|
||||
throw new Error('未收到音频数据,无法进行时长解析')
|
||||
}
|
||||
|
||||
try {
|
||||
audioDurationMs.value = await parseAudioDuration(res.data.audioBase64)
|
||||
// 自动校验时长
|
||||
validateAudioDuration()
|
||||
message.success('配音生成成功!')
|
||||
} catch (error) {
|
||||
// 解析失败则终止流程,要求用户重新生成
|
||||
console.error('❌ 音频解析失败:', error)
|
||||
message.error('音频解析失败,请重新生成配音')
|
||||
audioDurationMs.value = 0
|
||||
generatedAudio.value = null
|
||||
audioValidationPassed.value = false
|
||||
}
|
||||
} else {
|
||||
throw new Error(res.msg || '配音生成失败')
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('generateAudio error:', error)
|
||||
message.error(error.message || '配音生成失败')
|
||||
} finally {
|
||||
generatingAudio.value = false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析音频Base64数据并获取实际时长
|
||||
* @param {string} base64Data - Base64音频数据(可包含 data:audio/...;base64, 前缀)
|
||||
* @returns {Promise<number>} 音频时长(毫秒)
|
||||
*/
|
||||
const parseAudioDuration = (base64Data) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
// 提取纯Base64数据(移除data:...;base64,前缀)
|
||||
const base64 = base64Data.includes(',') ? base64Data.split(',')[1] : base64Data
|
||||
|
||||
// Base64转二进制数据
|
||||
const binaryString = window.atob(base64)
|
||||
const bytes = new Uint8Array(binaryString.length)
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i)
|
||||
}
|
||||
|
||||
// 创建Blob对象
|
||||
const blob = new Blob([bytes], { type: 'audio/mp3' })
|
||||
|
||||
// 创建音频对象并解析时长
|
||||
const audio = new Audio()
|
||||
const objectUrl = URL.createObjectURL(blob)
|
||||
|
||||
audio.addEventListener('loadedmetadata', () => {
|
||||
URL.revokeObjectURL(objectUrl)
|
||||
const durationMs = Math.round(audio.duration * 1000) // 转换为毫秒
|
||||
console.log('✅ 音频解析完成:', {
|
||||
duration: audio.duration + '秒',
|
||||
durationMs: durationMs + '毫秒'
|
||||
})
|
||||
resolve(durationMs)
|
||||
})
|
||||
|
||||
audio.addEventListener('error', (error) => {
|
||||
URL.revokeObjectURL(objectUrl)
|
||||
console.warn('⚠️ 音频解析失败,使用API返回的时长')
|
||||
reject(error)
|
||||
})
|
||||
|
||||
// 设置音频源并加载
|
||||
audio.src = objectUrl
|
||||
audio.load()
|
||||
} catch (error) {
|
||||
console.error('❌ 音频解析异常:', error)
|
||||
reject(error)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// 校验音频时长与人脸时长的匹配性
|
||||
// 要求:音频与人脸区间至少重合2秒
|
||||
const validateAudioDuration = () => {
|
||||
if (!identified.value || faceDuration.value <= 0) {
|
||||
audioValidationPassed.value = false
|
||||
return false
|
||||
}
|
||||
|
||||
const faceStart = identifyFaceStartTime.value
|
||||
const faceEnd = identifyFaceEndTime.value
|
||||
const faceDurationMs = faceEnd - faceStart
|
||||
const audioDuration = audioDurationMs.value
|
||||
|
||||
// 计算重合区间(简化:假设音频从人脸起点开始插入)
|
||||
const overlapStart = faceStart
|
||||
const overlapEnd = Math.min(faceEnd, faceStart + audioDuration)
|
||||
const overlapDuration = Math.max(0, overlapEnd - overlapStart)
|
||||
|
||||
// 校验:重合区间至少2秒
|
||||
const isValid = overlapDuration >= 2000
|
||||
|
||||
audioValidationPassed.value = isValid
|
||||
|
||||
if (!isValid) {
|
||||
const overlapSec = (overlapDuration / 1000).toFixed(1)
|
||||
message.warning(
|
||||
`音频时长(${(audioDuration/1000).toFixed(1)}秒)与人脸区间(${(faceDurationMs/1000).toFixed(1)}秒)不匹配,重合部分仅${overlapSec}秒,至少需要2秒`
|
||||
)
|
||||
} else {
|
||||
message.success('时长校验通过!')
|
||||
}
|
||||
|
||||
return isValid
|
||||
}
|
||||
|
||||
// 生成数字人视频
|
||||
const handleGenerate = async () => {
|
||||
if (!canGenerate.value) {
|
||||
@@ -454,6 +726,13 @@ const handleGenerate = async () => {
|
||||
return
|
||||
}
|
||||
|
||||
// 检查文案内容
|
||||
const text = ttsText.value.trim()
|
||||
if (!text) {
|
||||
message.warning('请输入文案内容')
|
||||
return
|
||||
}
|
||||
|
||||
const voice = selectedVoiceMeta.value
|
||||
if (!voice) {
|
||||
message.warning('请选择音色')
|
||||
@@ -474,9 +753,12 @@ const handleGenerate = async () => {
|
||||
volume: 0,
|
||||
guidanceScale: 1,
|
||||
seed: 8888,
|
||||
klingSessionId: identifySessionId.value,
|
||||
klingFaceId: identifyFaceId.value,
|
||||
aiProvider: 'kling'
|
||||
kling_session_id: identifySessionId.value,
|
||||
kling_face_id: identifyFaceId.value,
|
||||
// 人脸可对口型时间区间,用于音频插入时间
|
||||
kling_face_start_time: identifyFaceStartTime.value,
|
||||
kling_face_end_time: identifyFaceEndTime.value,
|
||||
ai_provider: 'kling'
|
||||
}
|
||||
|
||||
const configId = voice.rawId || extractIdFromString(voice.id)
|
||||
@@ -486,6 +768,25 @@ const handleGenerate = async () => {
|
||||
}
|
||||
taskData.voiceConfigId = configId
|
||||
|
||||
// ✅ 新增:传递预生成的音频给后端,复用而不重复TTS
|
||||
if (generatedAudio.value && audioDurationMs.value > 0) {
|
||||
taskData.pre_generated_audio = {
|
||||
audioBase64: generatedAudio.value.audioBase64,
|
||||
format: generatedAudio.value.format || 'mp3'
|
||||
}
|
||||
|
||||
// ✅ 新增:传递 sound_end_time 给可灵API(音频结束时间)
|
||||
// 可灵API要求:音频从0开始,所以结束时间 = 0 + 音频时长
|
||||
taskData.sound_end_time = audioDurationMs.value
|
||||
|
||||
console.log('传递预生成音频给后端:', {
|
||||
soundEndTime: taskData.sound_end_time,
|
||||
hasAudioData: !!generatedAudio.value.audioBase64
|
||||
})
|
||||
} else {
|
||||
console.warn('⚠️ 未找到预生成音频,将在后端重新TTS')
|
||||
}
|
||||
|
||||
message.loading('正在创建任务...', 0)
|
||||
const res = await createLipSyncTask(taskData)
|
||||
message.destroy()
|
||||
@@ -710,6 +1011,23 @@ let previewObjectUrl = ''
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.text-hint {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
margin-top: 8px;
|
||||
padding: 8px 12px;
|
||||
background: rgba(59, 130, 246, 0.1);
|
||||
border: 1px solid rgba(59, 130, 246, 0.2);
|
||||
border-radius: 6px;
|
||||
font-size: 13px;
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
.hint-icon {
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.voice-source-toggle {
|
||||
display: inline-flex;
|
||||
border: 1px solid rgba(59, 130, 246, 0.2);
|
||||
@@ -1088,4 +1406,83 @@ let previewObjectUrl = ''
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
/* 配音生成与校验样式 */
|
||||
.audio-generation-section {
|
||||
margin-bottom: 24px;
|
||||
padding: 16px;
|
||||
background: rgba(255, 255, 255, 0.03);
|
||||
border-radius: 12px;
|
||||
border: 1px solid rgba(59, 130, 246, 0.15);
|
||||
}
|
||||
|
||||
.generate-audio-row {
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.audio-preview {
|
||||
padding: 16px;
|
||||
background: rgba(0, 0, 0, 0.2);
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.audio-info h4 {
|
||||
color: #fff;
|
||||
margin-bottom: 12px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.duration-info {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 8px;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.duration-info .label {
|
||||
color: var(--color-text-secondary);
|
||||
}
|
||||
|
||||
.duration-info .value {
|
||||
color: #fff;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.duration-info.validation-passed .value {
|
||||
color: #52c41a;
|
||||
}
|
||||
|
||||
.duration-info.validation-failed .value {
|
||||
color: #ff4d4f;
|
||||
}
|
||||
|
||||
.audio-player {
|
||||
margin: 16px 0;
|
||||
}
|
||||
|
||||
.audio-element {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.regenerate-row {
|
||||
text-align: center;
|
||||
margin-top: 12px;
|
||||
}
|
||||
|
||||
.generate-hint {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
margin-top: 8px;
|
||||
padding: 8px 12px;
|
||||
background: rgba(255, 193, 7, 0.1);
|
||||
border: 1px solid rgba(255, 193, 7, 0.3);
|
||||
border-radius: 6px;
|
||||
font-size: 13px;
|
||||
color: #faad14;
|
||||
}
|
||||
|
||||
.hint-icon {
|
||||
font-size: 14px;
|
||||
}
|
||||
</style>
|
||||
|
||||
Reference in New Issue
Block a user