diff --git a/docs/cosyvoice-copy.md b/docs/cosyvoice-copy.md new file mode 100644 index 0000000000..898d996d39 --- /dev/null +++ b/docs/cosyvoice-copy.md @@ -0,0 +1,129 @@ +# 阿里云CosyVoice声音复刻API说明文档 +## 一、接口概述 +CosyVoice声音复刻API依托大模型技术,仅需10~20秒清晰音频,即可快速生成高度拟真的定制音色(`voice_id`),支持`cosyvoice-v1`和`cosyvoice-v2`模型(v2效果更优)。复刻服务免费,使用复刻音色进行语音合成时按字符计费。 + +核心功能:音色的创建、查询、更新、删除,生成的`voice_id`可直接用于CosyVoice语音合成接口。 + +## 二、前提条件 +1. 开通CosyVoice服务,获取API Key(推荐配置到环境变量,避免硬编码)。 +2. 安装最新版DashScope SDK(Java/Python),其他语言需调用RESTful API。 +3. 准备公网可访问的音频URL(推荐上传至阿里云OSS,支持WAV/MP3/M4A格式)。 + +## 三、核心接口详情(支持Java/Python SDK + RESTful API) +### 1. 创建音色(生成voice_id) +#### 功能描述 +上传10~20秒音频,生成专属`voice_id`,用于后续语音合成。 +#### 请求参数 +| 参数名 | 类型 | 是否必填 | 说明 | +|--------------|--------|----------|----------------------------------------------------------------------| +| target_model | string | 是 | 复刻模型,支持`cosyvoice-v1`/`cosyvoice-v2` | +| prefix | string | 是 | 音色自定义前缀,仅允许数字和小写字母,长度<10字符 | +| url | string | 是 | 音频文件公网URL,需满足格式要求(采样率≥16kHz、文件≤10MB、含≥5秒连续语音) | +#### 响应参数 +| 参数名 | 类型 | 说明 | +|----------|--------|--------------------------| +| voice_id | string | 定制音色ID,用于语音合成 | +| request_id| string | 任务唯一标识,用于排查问题 | +#### 示例代码(Python SDK) +```python +import os +import dashscope +from dashscope.audio.tts_v2 import VoiceEnrollmentService + +dashscope.api_key = os.getenv('DASHSCOPE_API_KEY') +service = VoiceEnrollmentService() +# 调用创建接口 +voice_id = service.create_voice(target_model="cosyvoice-v2", prefix="test", url="音频公网URL") +print(f"生成的voice_id: {voice_id}") +``` + +### 2. 查询所有音色 +#### 功能描述 +查询账号下已创建的所有音色,支持按前缀筛选和分页。 +#### 请求参数 +| 参数名 | 类型 | 是否必填 | 说明 | +|------------|--------|----------|-------------------------------| +| prefix | string | 否 | 音色前缀,为空则返回所有音色 | +| page_index | int | 否 | 页码索引,默认0 | +| page_size | int | 否 | 每页条数,默认10 | +#### 响应参数 +| 参数名 | 类型 | 说明 | +|--------------|--------|----------------------------------------------------------------------| +| voice_list | array | 音色列表,含每个音色的`voice_id`、创建时间(gmt_create)、状态(status) | +| status | string | 音色状态:DEPLOYING(审核中)/OK(可用)/UNDEPLOYED(审核失败) | +| request_id | string | 任务唯一标识 | + +### 3. 查询指定音色 +#### 功能描述 +查询单个`voice_id`的详细信息(状态、原始音频URL等)。 +#### 请求参数 +| 参数名 | 类型 | 是否必填 | 说明 | +|----------|--------|----------|--------------------| +| voice_id | string | 是 | 需查询的音色ID | +#### 响应参数 +| 参数名 | 类型 | 说明 | +|----------------|--------|----------------------------------------------------------------------| +| voice_id | string | 音色ID | +| resource_link | string | 复刻所用音频的公网URL | +| target_model | string | 复刻时使用的模型 | +| status | string | 音色状态(DEPLOYING/OK/UNDEPLOYED) | +| gmt_create | string | 音色创建时间 | + +### 4. 更新音色 +#### 功能描述 +使用新的音频URL更新已有`voice_id`的音色。 +#### 请求参数 +| 参数名 | 类型 | 是否必填 | 说明 | +|----------|--------|----------|----------------------------------------------------------------------| +| voice_id | string | 是 | 需更新的音色ID | +| url | string | 是 | 新的音频公网URL(需满足格式要求) | +#### 响应参数 +| 参数名 | 类型 | 说明 | +|------------|--------|--------------------| +| request_id | string | 任务唯一标识 | + +### 5. 删除音色 +#### 功能描述 +删除无需使用的`voice_id`,释放配额(账号最多保留1000个音色)。 +#### 请求参数 +| 参数名 | 类型 | 是否必填 | 说明 | +|----------|--------|----------|--------------------| +| voice_id | string | 是 | 需删除的音色ID | +#### 响应参数 +| 参数名 | 类型 | 说明 | +|------------|--------|--------------------| +| request_id | string | 任务唯一标识 | + +## 四、音频文件要求 +1. 格式:支持WAV(16bit)、MP3、M4A。 +2. 采样率:≥16000Hz。 +3. 时长:10~20秒(建议不超过60秒),含至少一段≥5秒的连续语音。 +4. 大小:≤10MB。 +5. 质量:语音清晰、无杂音,朗读连贯。 + +## 五、使用流程(复刻→合成) +1. 调用「创建音色」接口,传入音频URL,获取`voice_id`。 +2. 调用CosyVoice语音合成接口,将`voice_id`作为`voice`参数传入,即可使用定制音色合成语音。 +3. (可选)通过「查询指定音色」接口确认`status`为`OK`后再使用。 + +## 六、关键限制 +1. 配额限制:每个主账号最多保留1000个复刻音色,删除后释放配额。 +2. 并发限制:复刻接口总并发≤10 RPS(v1+v2合计),语音合成接口并发≤3 RPS。 +3. 模型匹配:v1版本`voice_id`仅用于v1合成,v2版本`voice_id`仅用于v2合成,不可混用。 +4. 有效期:超过1年未使用的音色将自动下线。 + +## 七、常见错误码及解决方案 +| 错误码 | 说明 | 解决方案 | +|-------------------------|---------------------------------------|------------------------------------------------------------------| +| Throttling.AllocationQuota | 音色数量达限额 | 删除无用音色或提交工单申请扩容 | +| Audio.AudioShortError | 音频有效时长过短 | 重新录制10~20秒连续语音 | +| InvalidApiKey | API Key无效 | 检查API Key是否正确,无多余空格或缺失字符 | +| Model.AccessDenied | 模型访问权限不足 | 使用“默认业务空间”下的API Key调用 | +| BadRequest.UnsupportedFileFormat | 音频格式不支持 | 转换为WAV/MP3/M4A格式,确认文件实际编码与后缀一致 | +| Audio.FileSizeExceed | 音频文件超过10MB | 压缩文件大小或截取有效片段 | + +## 八、注意事项 +1. 版权要求:需对复刻音频的所有权及合法使用权负责,遵守服务协议。 +2. 音频URL:确保公网可访问,推荐使用阿里云OSS生成临时访问链接(避免长期公开泄露)。 +3. 升级建议:v1音色可使用原始音频重新复刻为v2版本,获得更优效果。 +4. 合成调节:使用`voice_id`合成语音时,可通过`volume`(音量)、`speechRate`(语速)等参数调节输出效果。 diff --git a/docs/cosyvoice-generate.md b/docs/cosyvoice-generate.md new file mode 100644 index 0000000000..18406d9473 --- /dev/null +++ b/docs/cosyvoice-generate.md @@ -0,0 +1,271 @@ +# 阿里云CosyVoice Java SDK 调用模板(参数+示例) +## 一、前提条件 +1. 开通CosyVoice服务,获取API Key(建议配置到环境变量,避免硬编码)。 +2. 安装最新版DashScope SDK(支持2.20.3+版本,SSML功能需此版本及以上)。 +3. 模型与音色需匹配(如v2模型对应v2音色,v3模型对应v3音色)。 + +## 二、核心参数汇总 +| 参数名 | 类型 | 是否必填 | 默认值 | 取值范围/说明 | +|--------------|------------|----------|-------------------------|------------------------------------------------------------------------------| +| model | String | 是 | - | cosyvoice-v1/v2/v3/v3-plus(v3系列需申请邀测) | +| voice | String | 是 | - | 对应模型的音色(如v2:longxiaochun_v2;v3:longhuohuo_v3,详见文档音色列表) | +| format | enum | 否 | 因音色而异(默认MP3 22050Hz) | 支持WAV/MP3/PCM/OGG_OPUS,如PCM_22050HZ_MONO_16BIT、MP3_24000HZ_MONO_256KBPS | +| volume | int | 否 | 50 | 0~100(音量大小) | +| speechRate | float | 否 | 1.0 | 0.5~2.0(语速,1.0为默认,约4字/秒) | +| pitchRate | float | 否 | 1.0 | 0.5~2.0(语调) | +| bit_rate | int | 否 | 32 | 6~510kbps(仅opus格式支持,v1模型不支持) | +| seed | int | 否 | 0 | 0~65535(随机数种子,仅v3/v3-plus支持) | +| style | int | 否 | 0 | ≥0整数(风格调整,仅v3/v3-plus支持) | +| languageHints| List | 否 | - | 仅v3/v3-plus支持,单次配置1个语种("zh"/"en") | +| instruction | String | 否 | - | 仅v3/v3-plus支持,格式:"你说话的情感是<情感值>"(如"Happy"/"Angry") | + +## 三、四种核心调用方式模板 +### 1. 同步调用(阻塞式,适合短文本) +```java +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; +import java.io.File; +import java.io.FileOutputStream; +import java.nio.ByteBuffer; + +public class SyncCallTemplate { + // 配置参数(根据需求修改) + private static final String MODEL = "cosyvoice-v3"; // 模型 + private static final String VOICE = "longhuohuo_v3"; // 音色 + private static final String TEXT = "今天天气真好,适合出门散步!"; // ≤2000字符 + private static final String OUTPUT_FILE = "output.mp3"; // 输出文件 + + public static void main(String[] args) { + // 1. 构建请求参数 + SpeechSynthesisParam param = SpeechSynthesisParam.builder() + // .apiKey("your-api-key") // 未配置环境变量时打开 + .model(MODEL) + .voice(VOICE) + .volume(60) // 可选:调整音量 + .speechRate(1.1f) // 可选:调整语速 + .build(); + + // 2. 初始化合成器(同步调用传null) + SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null); + ByteBuffer audioData = null; + + try { + // 3. 阻塞调用,获取完整音频 + audioData = synthesizer.call(TEXT); + // 4. 保存音频到本地 + if (audioData != null) { + try (FileOutputStream fos = new FileOutputStream(new File(OUTPUT_FILE))) { + fos.write(audioData.array()); + } + System.out.println("合成成功!输出文件:" + OUTPUT_FILE); + System.out.println("RequestId:" + synthesizer.getLastRequestId()); + System.out.println("首包延迟:" + synthesizer.getFirstPackageDelay() + "ms"); + } + } catch (Exception e) { + System.err.println("合成失败:" + e.getMessage()); + } finally { + // 5. 关闭WebSocket连接 + synthesizer.getDuplexApi().close(1000, "任务结束"); + } + } +} +``` + +### 2. 异步调用(非阻塞,短文本实时接收) +```java +import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; +import com.alibaba.dashscope.common.ResultCallback; +import java.util.concurrent.CountDownLatch; + +public class AsyncCallTemplate { + private static final String MODEL = "cosyvoice-v2"; + private static final String VOICE = "longxiaochun_v2"; + private static final String TEXT = "欢迎使用阿里云CosyVoice语音合成服务!"; // ≤2000字符 + + public static void main(String[] args) throws InterruptedException { + CountDownLatch latch = new CountDownLatch(1); + + // 1. 配置回调(实时接收音频) + ResultCallback callback = new ResultCallback() { + @Override + public void onEvent(SpeechSynthesisResult result) { + // 接收音频分片(可实时播放或写入文件) + if (result.getAudioFrame() != null) { + System.out.println("收到音频分片,大小:" + result.getAudioFrame().capacity() + "字节"); + // 此处可添加音频处理逻辑(如流式播放、追加写入文件) + } + // 查看计费字符数(最终以最后一次为准) + if (result.getUsage() != null) { + System.out.println("当前计费字符数:" + result.getUsage().getCharacters()); + } + } + + @Override + public void onComplete() { + System.out.println("合成完成!"); + latch.countDown(); + } + + @Override + public void onError(Exception e) { + System.err.println("合成失败:" + e.getMessage()); + latch.countDown(); + } + }; + + // 2. 构建参数并初始化合成器 + SpeechSynthesisParam param = SpeechSynthesisParam.builder() + // .apiKey("your-api-key") + .model(MODEL) + .voice(VOICE) + .format(SpeechSynthesisAudioFormat.MP3_16000HZ_MONO_128KBPS) // 可选配置格式 + .build(); + SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback); + + try { + // 3. 非阻塞调用 + synthesizer.call(TEXT); + latch.await(); // 等待合成完成 + System.out.println("RequestId:" + synthesizer.getLastRequestId()); + } catch (Exception e) { + System.err.println("调用异常:" + e.getMessage()); + } finally { + synthesizer.getDuplexApi().close(1000, "任务结束"); + } + } +} +``` + +### 3. 流式调用(分段传文本,适合长文本) +```java +import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; +import com.alibaba.dashscope.common.ResultCallback; + +public class StreamingCallTemplate { + // 分段文本(每段≤2000字符,累计≤20万字符) + private static final String[] TEXT_SEGMENTS = { + "流式语音合成适合长文本场景,", + "可以分段发送文本,", + "服务端实时返回音频,", + "减少等待时间。" + }; + private static final String MODEL = "cosyvoice-v3"; + private static final String VOICE = "longchuanshu_v3"; + + public static void main(String[] args) { + // 1. 配置回调 + ResultCallback callback = new ResultCallback() { + @Override + public void onEvent(SpeechSynthesisResult result) { + if (result.getAudioFrame() != null) { + System.out.println("收到流式音频分片"); + // 处理音频(如实时播放、写入缓冲文件) + } + } + + @Override + public void onComplete() { + System.out.println("流式合成全部完成!"); + } + + @Override + public void onError(Exception e) { + System.err.println("流式合成失败:" + e.getMessage()); + } + }; + + // 2. 构建参数 + SpeechSynthesisParam param = SpeechSynthesisParam.builder() + // .apiKey("your-api-key") + .model(MODEL) + .voice(VOICE) + .format(SpeechSynthesisAudioFormat.PCM_22050HZ_MONO_16BIT) + .speechRate(0.9f) + .build(); + SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback); + + try { + // 3. 分段发送文本(间隔≤23秒) + for (String segment : TEXT_SEGMENTS) { + synthesizer.streamingCall(segment); + Thread.sleep(500); // 模拟文本输入间隔 + } + // 4. 必须调用:结束流式合成(触发剩余文本合成) + synthesizer.streamingComplete(); + System.out.println("RequestId:" + synthesizer.getLastRequestId()); + } catch (Exception e) { + System.err.println("调用异常:" + e.getMessage()); + } finally { + synthesizer.getDuplexApi().close(1000, "任务结束"); + } + } +} +``` + +### 4. Flowable调用(响应式编程,支持流式输入输出) +```java +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; +import com.alibaba.dashscope.exception.NoApiKeyException; +import io.reactivex.Flowable; +import io.reactivex.BackpressureStrategy; + +public class FlowableCallTemplate { + private static final String MODEL = "cosyvoice-v2"; + private static final String VOICE = "longyingtian_v2"; + private static final String[] TEXT_ARRAY = {"响应式编程模式,", "支持流式输入输出,", "适合高并发场景。"}; + + public static void main(String[] args) throws NoApiKeyException { + // 1. 模拟流式文本输入 + Flowable textStream = Flowable.create(emitter -> { + new Thread(() -> { + for (String text : TEXT_ARRAY) { + emitter.onNext(text); + try { + Thread.sleep(800); + } catch (InterruptedException e) { + emitter.onError(e); + } + } + emitter.onComplete(); + }).start(); + }, BackpressureStrategy.BUFFER); + + // 2. 构建参数 + SpeechSynthesisParam param = SpeechSynthesisParam.builder() + // .apiKey("your-api-key") + .model(MODEL) + .voice(VOICE) + .volume(70) + .build(); + SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null); + + try { + // 3. 流式调用并处理结果 + synthesizer.streamingCallAsFlowable(textStream) + .blockingForEach(result -> { + if (result.getAudioFrame() != null) { + System.out.println("Flowable收到音频,大小:" + result.getAudioFrame().capacity() + "字节"); + // 处理音频逻辑 + } + }); + System.out.println("Flowable合成完成!RequestId:" + synthesizer.getLastRequestId()); + } finally { + synthesizer.getDuplexApi().close(1000, "任务结束"); + } + } +} +``` + +## 四、核心注意事项 +1. 文本长度限制:非流式单次≤2000字符,流式累计≤20万字符(含SSML标签)。 +2. 字符计算规则:汉字=2字符,英文/数字/标点/空格=1字符。 +3. 流式调用必须调用`streamingComplete()`,否则结尾文本无法合成。 +4. 每次调用`call()`前需重新初始化`SpeechSynthesizer`实例。 +5. 音频格式需与播放器兼容(如MP3/OPUS支持流式播放,推荐使用ffmpeg、AudioFormat等工具)。 + diff --git a/docs/cosyvoice-ram-config.md b/docs/cosyvoice-ram-config.md new file mode 100644 index 0000000000..6a520b2865 --- /dev/null +++ b/docs/cosyvoice-ram-config.md @@ -0,0 +1,156 @@ +# CosyVoice访问OSS配置指南 + +## 问题说明 +CosyVoice复刻服务需要访问OSS存储的音频文件,但默认情况下CosyVoice没有访问用户OSS的权限。 + +## 解决方案:配置RAM权限 + +### 1. 创建RAM角色 +在阿里云RAM控制台创建角色,允许CosyVoice服务访问OSS: + +```json +{ + "Version": "1", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "oss:GetObject", + "oss:HeadObject" + ], + "Resource": [ + "acs:oss:*:*:{bucket-name}/*" + ] + } + ] +} +``` + +**参数说明:** +- `{bucket-name}`:替换为你的OSS存储桶名称 + +### 2. 配置信任策略 +为RAM角色添加信任策略,允许DashScope服务扮演该角色: + +```json +{ + "Version": "1", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": [ + "dashscope.aliyuncs.com" + ] + }, + "Action": "sts:AssumeRole" + } + ] +} +``` + +### 3. 授权步骤(控制台操作) + +**步骤1:创建自定义权限策略** +1. 登录阿里云控制台 → 访问控制(RAM) → 权限管理 → 权限策略 +2. 点击"创建策略" +3. 选择"脚本配置" +4. 粘贴上述JSON权限策略 +5. 策略名称:`CosyVoice-OSS-Access` +6. 点击"确定" + +**步骤2:创建RAM角色** +1. RAM → 身份管理 → 角色 → 创建角色 +2. 选择"阿里云服务" → "DashScope" +3. 输入角色名称:`CosyVoice-OSS-Role` +4. 完成创建 + +**步骤3:授权角色访问OSS** +1. 在角色详情页面,点击"添加权限" +2. 搜索并添加: + - `AliyunOSSReadOnlyAccess`(阿里云OSS只读权限) + - `CosyVoice-OSS-Access`(自定义权限) +3. 点击"确定" + +**步骤4:获取ARN** +在角色详情页面,复制"ARN": +``` +acs:ram::{你的AccountID}:role/CosyVoice-OSS-Role +``` + +### 4. 配置application.yaml + +在 `yudao-server/src/main/resources/application.yaml` 中添加: + +```yaml +yudao: + cosyvoice: + # ... 其他配置 + # RAM角色ARN(用于授权CosyVoice访问OSS) + ram-role-arn: "acs:ram::{AccountID}:role/CosyVoice-OSS-Role" +``` + +### 5. 修改OSS访问方式 + +在 `CosyVoiceClient.buildClonePayload()` 中,添加授权信息: + +```java +// 在请求头中添加Authorization +Request httpRequest = new Request.Builder() + .url(properties.getVoiceEnrollmentUrl()) + .addHeader("Authorization", "Bearer " + properties.getApiKey()) + .addHeader("Content-Type", "application/json") + // 添加STS临时凭证(需要先调用AssumeRole获取临时凭证) + .addHeader("x-acs-security-token", "{security-token}") + .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON)) + .build(); +``` + +### 6. 获取STS临时凭证(Java代码) + +```java +import com.aliyun.sts20150401.Client; +import com.aliyun.sts20150401.models.AssumeRoleRequest; +import com.aliyun.sts20150401.models.AssumeRoleResponse; + +public String getSecurityToken() { + // 需要配置RAM角色ARN和访问密钥 + AssumeRoleRequest request = new AssumeRoleRequest() + .setRoleArn(properties.getRamRoleArn()) + .setRoleSessionName("cosyvoice-session"); + + AssumeRoleResponse response = client.assumeRole(request); + return response.body.credentials.securityToken; +} +``` + +### 7. 完整实现思路 + +1. **本地开发**:使用STS临时凭证 +2. **生产环境**: + - 方案A:配置RAM角色,让CosyVoice直接访问OSS + - 方案B:将音频文件上传到CosyVoice可访问的公共OSS存储桶 + +## 替代方案:使用公共OSS存储桶 + +如果RAM权限配置复杂,可以: + +1. 创建公共可读的OSS存储桶 +2. 将音频文件上传到该存储桶 +3. 使用公共URL进行复刻 + +**注意**:公共存储桶存在安全风险,仅用于测试! + +## 验证配置 + +配置完成后,重新测试语音复刻功能: + +1. 查看日志中的请求URL是否可公网访问 +2. 查看是否还有"url error"错误 +3. 查看复刻是否成功返回voice_id + +## 参考资料 + +- [阿里云RAM权限管理](https://help.aliyun.com/zh/ram/instance/role/parameter-overview) +- [STS临时凭证](https://help.aliyun.com/zh/acs/STS/usage-scenarios/usage-scenarios) +- [CosyVoice错误代码](https://help.aliyun.com/zh/model-studio/error-code#error-url) diff --git a/frontend/app/web-gold/src/api/userPrompt.js b/frontend/app/web-gold/src/api/userPrompt.js index 27e34bb375..43540c6b34 100644 --- a/frontend/app/web-gold/src/api/userPrompt.js +++ b/frontend/app/web-gold/src/api/userPrompt.js @@ -15,7 +15,12 @@ export const UserPromptApi = { * @returns {Promise} 响应数据 */ createUserPrompt: async (data) => { - return await http.post(`${SERVER_BASE_AI}/user-prompt/create`, data) + console.log('[UserPromptApi] 发送请求参数:', JSON.stringify(data, null, 2)) + return await http.post(`${SERVER_BASE_AI}/user-prompt/create`, data, { + headers: { + 'Content-Type': 'application/json' + } + }) }, /** diff --git a/frontend/app/web-gold/src/views/content-style/components/SavePromptModal.vue b/frontend/app/web-gold/src/views/content-style/components/SavePromptModal.vue index 6d3a97ec11..d2edd46614 100644 --- a/frontend/app/web-gold/src/views/content-style/components/SavePromptModal.vue +++ b/frontend/app/web-gold/src/views/content-style/components/SavePromptModal.vue @@ -54,17 +54,27 @@ async function handleSave() { savingPrompt.value = true try { + // 确保 content 字段有值 + const content = savePromptForm.value.content?.trim() || '' + if (!content) { + message.error('提示词内容不能为空') + savingPrompt.value = false + return + } + const payload = { userId: userId, name: savePromptForm.value.name.trim(), - content: savePromptForm.value.content.trim(), - category: savePromptForm.value.category.trim() || null, + content: content, // 确保 content 有值 + category: savePromptForm.value.category?.trim() || null, isPublic: false, sort: 0, useCount: 0, status: 1, } + console.log('[SavePromptModal] 发送请求参数:', payload) + const response = await UserPromptApi.createUserPrompt(payload) if (response && (response.code === 0 || response.code === 200)) { diff --git a/frontend/app/web-gold/src/views/dh/Video.vue b/frontend/app/web-gold/src/views/dh/Video.vue index 5e84b73c87..3da8d428cc 100644 --- a/frontend/app/web-gold/src/views/dh/Video.vue +++ b/frontend/app/web-gold/src/views/dh/Video.vue @@ -1,7 +1,5 @@ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/userprompt/controller/app/AppUserPromptController.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/userprompt/controller/app/AppUserPromptController.java index 52ce2ac3a6..1b905eceaa 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/userprompt/controller/app/AppUserPromptController.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/userprompt/controller/app/AppUserPromptController.java @@ -10,7 +10,6 @@ import cn.iocoder.yudao.module.tik.userprompt.vo.UserPromptRespVO; import cn.iocoder.yudao.module.tik.userprompt.vo.UserPromptSaveReqVO; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; -import io.swagger.v3.oas.annotations.parameters.RequestBody; import io.swagger.v3.oas.annotations.tags.Tag; import jakarta.annotation.Resource; import jakarta.validation.Valid; @@ -33,34 +32,31 @@ public class AppUserPromptController { @PostMapping("/create") @Operation(summary = "创建用户提示词") - public CommonResult createUserPrompt(@RequestBody UserPromptSaveReqVO createReqVO) { - // 先设置当前登录用户ID(在验证之前设置,避免 @NotNull 验证失败) + public CommonResult createUserPrompt(@Valid @RequestBody UserPromptSaveReqVO createReqVO) { + // 设置当前登录用户ID Long userId = getLoginUserId(); if (userId == null) { return CommonResult.error(401, "用户未登录"); } createReqVO.setUserId(userId); - // 手动验证必要字段 - if (createReqVO.getName() == null || createReqVO.getName().trim().isEmpty()) { - return CommonResult.error(400, "提示词名称不能为空"); + // 处理字符串字段的trim + if (createReqVO.getName() != null) { + createReqVO.setName(createReqVO.getName().trim()); } - if (createReqVO.getContent() == null || createReqVO.getContent().trim().isEmpty()) { - return CommonResult.error(400, "提示词内容不能为空"); - } - if (createReqVO.getStatus() == null) { - return CommonResult.error(400, "状态不能为空"); + if (createReqVO.getContent() != null) { + createReqVO.setContent(createReqVO.getContent().trim()); } - // 设置默认值(如果前端没有传递) + // 设置默认值 if (createReqVO.getIsPublic() == null) { - createReqVO.setIsPublic(false); // 默认私有 + createReqVO.setIsPublic(false); } if (createReqVO.getSort() == null) { - createReqVO.setSort(0); // 默认排序为 0 + createReqVO.setSort(0); } if (createReqVO.getUseCount() == null) { - createReqVO.setUseCount(0); // 默认使用次数为 0 + createReqVO.setUseCount(0); } return success(userPromptService.createUserPrompt(createReqVO)); diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java index 19e674a5ef..73cd624866 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java @@ -3,9 +3,15 @@ package cn.iocoder.yudao.module.tik.voice.client; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.util.StrUtil; import cn.iocoder.yudao.framework.common.exception.ServiceException; +import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult; import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest; import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult; import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; +import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice; +import com.alibaba.dashscope.audio.ttsv2.enrollment.VoiceEnrollmentService; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import lombok.RequiredArgsConstructor; @@ -17,6 +23,7 @@ import okhttp3.RequestBody; import okhttp3.Response; import org.springframework.stereotype.Component; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.Base64; @@ -53,28 +60,130 @@ public class CosyVoiceClient { if (request == null || StrUtil.isBlank(request.getText())) { throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空"); } + if (StrUtil.isBlank(request.getVoiceId())) { + throw exception0(VOICE_TTS_FAILED.getCode(), "必须提供 voiceId"); + } + SpeechSynthesizer synthesizer = null; try { - String payload = objectMapper.writeValueAsString(buildPayload(request)); - Request httpRequest = new Request.Builder() - .url(properties.getTtsUrl()) - .addHeader("Authorization", "Bearer " + properties.getApiKey()) - .addHeader("Content-Type", "application/json") - .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON)) + log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}]", + request.getVoiceId(), + request.getText().length(), + StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel())); + + // 使用 DashScope SDK 构建参数(严格按文档) + SpeechSynthesisParam param = SpeechSynthesisParam.builder() + .apiKey(properties.getApiKey()) + .model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel())) + .voice(request.getVoiceId()) .build(); - try (Response response = getHttpClient().newCall(httpRequest).execute()) { - String body = response.body() != null ? response.body().string() : ""; - if (!response.isSuccessful()) { - log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body); - throw buildException(body); - } - return parseTtsResult(body, request); + // 初始化合成器(同步调用传 null) + synthesizer = new SpeechSynthesizer(param, null); + + // 阻塞调用,获取完整音频 + ByteBuffer audioData = synthesizer.call(request.getText()); + + if (audioData == null) { + throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频数据"); } + + // 转换为字节数组(严格按照文档:直接使用 array()) + byte[] audioBytes = audioData.array(); + + log.info("[CosyVoice][TTS合成成功][Request ID: {}, audioSize={}, 首包延迟={}ms]", + synthesizer.getLastRequestId(), + audioBytes.length, + synthesizer.getFirstPackageDelay()); + + // 构建返回结果 + CosyVoiceTtsResult result = new CosyVoiceTtsResult(); + result.setAudio(audioBytes); + result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : properties.getAudioFormat()); + result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate()); + result.setRequestId(synthesizer.getLastRequestId()); + result.setVoiceId(request.getVoiceId()); + + return result; + } catch (ServiceException ex) { throw ex; } catch (Exception ex) { - log.error("[CosyVoice][TTS异常]", ex); + log.error("[CosyVoice][TTS异常][voiceId={}, text={}]", request.getVoiceId(), request.getText(), ex); + throw exception(VOICE_TTS_FAILED); + } finally { + // 关闭 WebSocket 连接 + if (synthesizer != null) { + try { + synthesizer.getDuplexApi().close(1000, "任务结束"); + } catch (Exception e) { + log.warn("[CosyVoice][关闭连接失败]", e); + } + } + } + } + + /** + * 使用 HTTP API 进行 TTS 合成(备用方案) + */ + private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception { + String payload = objectMapper.writeValueAsString(buildPayload(request)); + Request httpRequest = new Request.Builder() + .url(properties.getTtsUrl()) + .addHeader("Authorization", "Bearer " + properties.getApiKey()) + .addHeader("Content-Type", "application/json") + .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON)) + .build(); + + try (Response response = getHttpClient().newCall(httpRequest).execute()) { + String body = response.body() != null ? response.body().string() : ""; + if (!response.isSuccessful()) { + log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body); + throw buildException(body); + } + return parseTtsResult(body, request); + } + } + + /** + * 调用 CosyVoice 语音复刻接口(声音注册) + */ + public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) { + if (!properties.isEnabled()) { + throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key"); + } + if (request == null || StrUtil.isBlank(request.getUrl())) { + throw exception0(VOICE_TTS_FAILED.getCode(), "复刻音频URL不能为空"); + } + if (request == null || StrUtil.isBlank(request.getTargetModel())) { + throw exception0(VOICE_TTS_FAILED.getCode(), "复刻模型不能为空"); + } + if (request == null || StrUtil.isBlank(request.getPrefix())) { + throw exception0(VOICE_TTS_FAILED.getCode(), "音色前缀不能为空"); + } + + try { + log.info("[CosyVoice][开始语音复刻][targetModel={}, prefix={}, url={}]", + request.getTargetModel(), request.getPrefix(), request.getUrl()); + + // 使用 DashScope SDK 创建语音复刻 + VoiceEnrollmentService service = new VoiceEnrollmentService(properties.getApiKey()); + Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl()); + + log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]", + service.getLastRequestId(), voice.getVoiceId()); + + // 构建返回结果 + CosyVoiceCloneResult result = new CosyVoiceCloneResult(); + result.setVoiceId(voice.getVoiceId()); + result.setRequestId(service.getLastRequestId()); + + return result; + } catch (ServiceException ex) { + throw ex; + } catch (Exception ex) { + log.error("[CosyVoice][语音复刻异常][targetModel={}, prefix={}]", + request.getTargetModel(), request.getPrefix(), ex); throw exception(VOICE_TTS_FAILED); } } @@ -86,27 +195,55 @@ public class CosyVoiceClient { Map input = new HashMap<>(); input.put("text", request.getText()); - String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId()); - if (StrUtil.isNotBlank(voiceId)) { - input.put("voice", voiceId); + + // 优先使用fileUrl(语音克隆),否则使用voiceId(系统音色) + if (StrUtil.isNotBlank(request.getFileUrl())) { + // 直接使用预签名URL(带签名和时效),阿里云API需要这个签名URL + input.put("audio_url", request.getFileUrl()); + log.info("[CosyVoice][使用语音克隆][audio_url={}]", request.getFileUrl()); + + // 如果提供了参考文本,也一并传递(用于提高语音克隆质量) + if (StrUtil.isNotBlank(request.getReferenceText())) { + input.put("reference_text", request.getReferenceText()); + log.info("[CosyVoice][添加参考文本][length={}]", request.getReferenceText().length()); + } + } else { + // 使用系统音色 + String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId()); + if (StrUtil.isNotBlank(voiceId)) { + input.put("voice", voiceId); + log.info("[CosyVoice][使用系统音色][voice={}]", voiceId); + } else { + log.warn("[CosyVoice][未提供voiceId或fileUrl]"); + } } payload.put("input", input); Map parameters = new HashMap<>(); int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate(); parameters.put("sample_rate", sampleRate); - String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat()); + + // 根据官方文档,统一使用小写格式 + String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat()).toLowerCase(); parameters.put("format", format); + if (request.getSpeechRate() != null) { parameters.put("speech_rate", request.getSpeechRate()); } if (request.getVolume() != null) { - parameters.put("volume", request.getVolume()); + // 文档显示volume范围是0-100 + parameters.put("volume", Math.round(request.getVolume())); } if (request.isPreview()) { parameters.put("preview", true); } + payload.put("parameters", parameters); + + // 打印完整请求体(用于调试) + log.info("[CosyVoice][请求参数][model={}, sample_rate={}, format={}, text_length={}]", + model, sampleRate, format, request.getText().length()); + return payload; } @@ -173,6 +310,26 @@ public class CosyVoiceClient { return exception0(VOICE_TTS_FAILED.getCode(), body); } } + + /** + * 从URL中提取原始URL(去除查询参数和锚点) + * + * @param url 可能包含查询参数的URL + * @return 原始URL(去除查询参数和锚点) + */ + private String extractRawUrl(String url) { + if (StrUtil.isBlank(url)) { + return url; + } + try { + java.net.URL urlObj = new java.net.URL(url); + // 只使用协议、主机、路径部分,忽略查询参数和锚点 + return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath(); + } catch (Exception e) { + // 如果URL解析失败,使用简单方式去除查询参数 + return url.split("\\?")[0].split("#")[0]; + } + } } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java new file mode 100644 index 0000000000..9e9156e0a9 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java @@ -0,0 +1,36 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Data; + +/** + * CosyVoice 语音复刻请求 + */ +@Data +public class CosyVoiceCloneRequest { + + /** + * 复刻模型(cosyvoice-v1 或 cosyvoice-v2) + */ + private String targetModel; + + /** + * 音色自定义前缀(仅允许数字和小写字母,长度<10字符) + */ + private String prefix; + + /** + * 音频文件公网URL + */ + private String url; + + /** + * 采样率,默认24000 + */ + private Integer sampleRate; + + /** + * 音频格式,默认wav + */ + private String audioFormat; + +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java new file mode 100644 index 0000000000..d01421a269 --- /dev/null +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java @@ -0,0 +1,21 @@ +package cn.iocoder.yudao.module.tik.voice.client.dto; + +import lombok.Data; + +/** + * CosyVoice 语音复刻结果 + */ +@Data +public class CosyVoiceCloneResult { + + /** + * 生成的 voice_id + */ + private String voiceId; + + /** + * 请求ID + */ + private String requestId; + +} diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java index 6fcc1f66b9..2f270cc7d1 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java @@ -20,6 +20,16 @@ public class CosyVoiceTtsRequest { */ private String voiceId; + /** + * 语音文件URL(当使用语音URL合成时使用,替代voiceId) + */ + private String fileUrl; + + /** + * 参考音频文本(当使用fileUrl时,用于提高克隆质量) + */ + private String referenceText; + /** * 模型(默认 cosyvoice-v2) */ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java index 60b39c4abb..19cc522a35 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java @@ -50,6 +50,11 @@ public class CosyVoiceProperties { */ private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis"; + /** + * 语音复刻接口地址(声音注册) + */ + private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment"; + /** * 连接超时时间 */ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java index 4ab397b217..6e2a666610 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java @@ -54,6 +54,10 @@ public class TikUserVoiceDO extends TenantBaseDO { * 备注信息 */ private String note; + /** + * 复刻音色ID(CosyVoice 语音复刻生成的 voice_id) + */ + private String voiceId; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java index faf6058ce8..83d7c9eb17 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java @@ -20,6 +20,8 @@ import cn.iocoder.yudao.module.tik.file.service.TikUserFileService; import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService; import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX; import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient; +import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest; +import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult; import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest; import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult; import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties; @@ -91,8 +93,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { @Resource private StringRedisTemplate stringRedisTemplate; - /** 预签名URL过期时间(1小时,单位:秒) */ - private static final int PRESIGN_URL_EXPIRATION_SECONDS = 3600; + /** 预签名URL过期时间(24小时,单位:秒) */ + private static final int PRESIGN_URL_EXPIRATION_SECONDS = 24 * 3600; private static final String PREVIEW_CACHE_PREFIX = "tik:voice:preview:"; private static final String SYNTH_CACHE_PREFIX = "tik:voice:tts:"; private static final long PREVIEW_CACHE_TTL_SECONDS = 3600; @@ -138,14 +140,30 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .setTranscription(null); // 初始为空,表示未识别 voiceMapper.insert(voice); - // 4. 如果开启自动识别,异步执行识别 - if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) { + // 4. 调用阿里云语音复刻服务,生成 voice_id + try { + log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({})]", voice.getId(), fileDO.getId()); String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - log.info("[createVoice][开启自动识别,配音编号({}),文件ID({}),预签名URL({})]", - voice.getId(), fileDO.getId(), fileAccessUrl); - asyncTranscribeVoice(voice.getId(), fileAccessUrl); + + CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest(); + cloneRequest.setTargetModel("cosyvoice-v2"); // 使用v2模型,效果更好 + cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀,格式要求 + cloneRequest.setUrl(fileAccessUrl); + + CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest); + String voiceId = cloneResult.getVoiceId(); + + // 更新配音记录,保存 voice_id + voice.setVoiceId(voiceId); + voiceMapper.updateById(voice); + + log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId); + } catch (Exception e) { + log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e); + // 复刻失败不影响配音记录创建,只记录日志 } + log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId()); return voice.getId(); } @@ -230,6 +248,10 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { // 查询配音列表 PageResult pageResult = voiceMapper.selectPage(pageReqVO); + // 增加日志:记录查询到的配音数量和用户ID + log.info("[getVoicePage][查询配音列表,用户ID={}, 总数={}]", + userId, pageResult.getTotal()); + // 批量查询文件信息,避免 N+1 查询 Map fileMap = new HashMap<>(); if (CollUtil.isNotEmpty(pageResult.getList())) { @@ -237,7 +259,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .map(TikUserVoiceDO::getFileId) .distinct() .collect(Collectors.toList()); - + if (CollUtil.isNotEmpty(fileIds)) { List files = fileMapper.selectBatchIds(fileIds); Map tempFileMap = files.stream() @@ -258,6 +280,12 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { vo.setFileUrl(presignedUrl); } + // 增加日志:记录转换后的VO数据 + if (log.isDebugEnabled()) { + log.debug("[getVoicePage][转换VO,配音ID={}, 名称={}]", + vo.getId(), vo.getName()); + } + return vo; }); } @@ -297,28 +325,101 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { throw exception(VOICE_NOT_EXISTS); } - // 2. 获取文件URL + // 2. 检查是否已经有识别结果 + if (StrUtil.isNotBlank(voice.getTranscription())) { + log.info("[transcribeVoice][配音已经识别过,配音编号({}),跳过识别]", id); + return; + } + + // 3. 获取文件URL FileDO fileDO = fileMapper.selectById(voice.getFileId()); if (fileDO == null) { throw exception(VOICE_FILE_NOT_EXISTS); } - // 3. 异步执行识别 + // 4. 异步执行识别 String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); asyncTranscribeVoice(id, fileAccessUrl); } @Override public AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO) { + Long userId = SecurityFrameworkUtils.getLoginUserId(); + Long voiceConfigId = reqVO.getVoiceConfigId(); + + // 增加请求参数日志 + log.info("[synthesizeVoice][开始合成,请求参数:voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]", + voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId); + + String voiceId = null; + String fileUrl = null; + String transcriptionText = null; + + // 1. 如果有配置ID,根据配置ID查询配音信息(用户配音) + if (voiceConfigId != null) { + log.info("[synthesizeVoice][开始合成,配音编号({}),用户({})]", voiceConfigId, userId); + + TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId); + log.info("[synthesizeVoice][查询配音结果:voice={},配音编号={},用户ID={}]", + voice != null ? "存在" : "不存在", voiceConfigId, userId); + + if (voice == null) { + log.warn("[synthesizeVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId); + throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId); + } + if (!voice.getUserId().equals(userId)) { + log.warn("[synthesizeVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]", + voiceConfigId, voice.getUserId(), userId); + throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户"); + } + + // 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据) + if (StrUtil.isNotBlank(voice.getVoiceId())) { + log.info("[synthesizeVoice][使用复刻音色ID合成,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId()); + voiceId = voice.getVoiceId(); + transcriptionText = voice.getTranscription(); + } else { + log.info("[synthesizeVoice][使用文件URL合成,配音编号({})]", voiceConfigId); + // 获取文件信息,用于获取文件URL + FileDO fileDO = fileMapper.selectById(voice.getFileId()); + if (fileDO == null) { + throw exception(VOICE_FILE_NOT_EXISTS); + } + + // 使用文件URL和识别文本进行合成 + fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); + transcriptionText = voice.getTranscription(); + if (StrUtil.isBlank(transcriptionText)) { + throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); + } + } + } + // 2. 如果没有配置ID,使用voiceId或fileUrl(系统音色或直接URL方式) + else { + // 参数验证:如果使用fileUrl,建议提供transcriptionText以提高克隆质量 + if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isBlank(reqVO.getTranscriptionText())) { + log.warn("[synthesizeVoice][使用fileUrl但未提供transcriptionText,可能影响克隆质量]"); + } + + // 参数验证:必须提供voiceId或fileUrl之一 + if (StrUtil.isBlank(reqVO.getVoiceId()) && StrUtil.isBlank(reqVO.getFileUrl())) { + throw exception(VOICE_NOT_EXISTS, "请提供音色ID(voiceId)或语音文件URL(fileUrl)"); + } + + voiceId = reqVO.getVoiceId(); + fileUrl = reqVO.getFileUrl(); + transcriptionText = reqVO.getTranscriptionText(); + } + String finalText = determineSynthesisText( - reqVO.getTranscriptionText(), + transcriptionText, reqVO.getInputText(), false); finalText = appendEmotion(finalText, reqVO.getEmotion()); String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX, - reqVO.getVoiceId(), - reqVO.getFileUrl(), + voiceId, + fileUrl, finalText, reqVO.getSpeechRate(), reqVO.getVolume(), @@ -333,7 +434,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest( finalText, - reqVO.getVoiceId(), + voiceId, + fileUrl, + transcriptionText, reqVO.getModel(), reqVO.getSpeechRate(), reqVO.getVolume(), @@ -343,82 +446,196 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { )); String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat()); - String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId()); + String finalVoiceId = StrUtil.blankToDefault(voiceId, cosyVoiceProperties.getDefaultVoiceId()); ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile( "file", - buildFileName(voiceId, format), + buildFileName(finalVoiceId, format), resolveContentType(format), ttsResult.getAudio() ); - Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null); + Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null); + + // 通过infraFileId查询TikUserFileDO,获取用户文件ID + TikUserFileDO userFile = userFileMapper.selectOne( + new LambdaQueryWrapperX() + .eq(TikUserFileDO::getFileId, infraFileId) + .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId()) + .orderByDesc(TikUserFileDO::getId) + .last("LIMIT 1")); + if (userFile == null) { + throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录"); + } AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO(); - respVO.setFileId(fileId); - respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId)); + respVO.setFileId(infraFileId); // 返回infraFileId,保持与原有逻辑一致 + respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL respVO.setFormat(format); respVO.setSampleRate(ttsResult.getSampleRate()); respVO.setRequestId(ttsResult.getRequestId()); - respVO.setVoiceId(voiceId); + respVO.setVoiceId(finalVoiceId); saveSynthCache(cacheKey, new SynthCacheEntry( Base64.getEncoder().encodeToString(ttsResult.getAudio()), format, ttsResult.getSampleRate(), ttsResult.getRequestId(), - voiceId + finalVoiceId )); return respVO; } @Override public AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO) { - String finalText = determineSynthesisText( - reqVO.getTranscriptionText(), - reqVO.getInputText(), - true); - finalText = appendEmotion(finalText, reqVO.getEmotion()); + Long userId = SecurityFrameworkUtils.getLoginUserId(); + Long voiceConfigId = reqVO.getVoiceConfigId(); + // 增加请求参数日志 + log.info("[previewVoice][开始试听,请求参数:voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]", + voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId); + + String voiceId = null; + String fileUrl = null; + String transcriptionText = null; + String inputText; + + // 1. 如果传入了fileUrl和transcriptionText,直接使用(通过语音URL合成) + if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) { + log.info("[previewVoice][使用语音URL合成,用户({})]", userId); + // 如果传入的是预签名URL,提取原始URL(去除查询参数),避免二次签名 + String rawFileUrl = extractRawUrl(reqVO.getFileUrl()); + // 如果提取后的URL与原始URL不同,说明是预签名URL,需要重新生成预签名URL + // 否则直接使用(可能是原始URL或公开URL) + if (!rawFileUrl.equals(reqVO.getFileUrl())) { + // 重新生成预签名URL,确保有效期足够长 + fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS); + log.info("[previewVoice][检测到预签名URL,已提取原始URL并重新生成预签名URL]"); + } else { + fileUrl = reqVO.getFileUrl(); + } + transcriptionText = reqVO.getTranscriptionText(); + inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText); + } + // 2. 如果有配置ID,根据配置ID查询配音信息(用户配音) + else if (voiceConfigId != null) { + log.info("[previewVoice][开始试听,配音编号({}),用户({})]", voiceConfigId, userId); + + TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId); + log.info("[previewVoice][查询配音结果:voice={},配音编号={},用户ID={}]", + voice != null ? "存在" : "不存在", voiceConfigId, userId); + + if (voice == null) { + log.warn("[previewVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId); + throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId); + } + if (!voice.getUserId().equals(userId)) { + log.warn("[previewVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]", + voiceConfigId, voice.getUserId(), userId); + throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户"); + } + + // 优先使用复刻的 voice_id,如果不存在则使用文件URL(兼容旧数据) + if (StrUtil.isNotBlank(voice.getVoiceId())) { + log.info("[previewVoice][使用复刻音色ID试听,配音编号({}),voice_id({})]", voiceConfigId, voice.getVoiceId()); + voiceId = voice.getVoiceId(); + transcriptionText = voice.getTranscription(); + inputText = StrUtil.blankToDefault(reqVO.getInputText(), + StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText())); + } else { + log.info("[previewVoice][使用文件URL试听,配音编号({})]", voiceConfigId); + // 获取文件信息,用于获取文件URL + FileDO fileDO = fileMapper.selectById(voice.getFileId()); + if (fileDO == null) { + throw exception(VOICE_FILE_NOT_EXISTS); + } + + // 使用文件URL和识别文本进行合成 + fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); + transcriptionText = voice.getTranscription(); + if (StrUtil.isBlank(transcriptionText)) { + throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); + } + inputText = StrUtil.blankToDefault(reqVO.getInputText(), + StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText())); + } + } + // 3. 如果没有配置ID,使用系统配音配置(需要前端传voiceId) + else { + log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId); + voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId()); + if (StrUtil.isBlank(voiceId)) { + throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空"); + } + inputText = StrUtil.blankToDefault(reqVO.getInputText(), cosyVoiceProperties.getPreviewText()); + } + + String finalText = determineSynthesisText( + transcriptionText, + inputText, + true); + + // 使用请求参数或默认值 + String emotion = StrUtil.blankToDefault(reqVO.getEmotion(), "neutral"); + finalText = appendEmotion(finalText, emotion); + Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f; + Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f; + String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3"); + + // 构建缓存key(使用fileUrl或voiceId) String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, - reqVO.getVoiceId(), - reqVO.getFileUrl(), + voiceId, + fileUrl, finalText, - reqVO.getSpeechRate(), - reqVO.getVolume(), - reqVO.getEmotion(), - reqVO.getAudioFormat(), + speechRate, + volume, + emotion, + audioFormat, null); PreviewCacheEntry previewCache = getPreviewCache(cacheKey); - String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId()); if (previewCache != null) { + log.info("[previewVoice][使用缓存,配音编号({}),voiceId({}),fileUrl({}),cacheKey({})]", + voiceConfigId, voiceId, fileUrl, cacheKey); + // 缓存中存储的是原始URL,需要生成预签名URL String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS); return buildPreviewResp(previewCache, cachedUrl, voiceId); } + log.info("[previewVoice][调用CosyVoice合成,配音编号({}),voiceId({}),fileUrl({}),文本长度({})]", + voiceConfigId, voiceId, fileUrl, finalText.length()); CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest( finalText, - reqVO.getVoiceId(), - reqVO.getModel(), - reqVO.getSpeechRate(), - reqVO.getVolume(), + voiceId, + fileUrl, + transcriptionText, // 参考音频文本,用于提高克隆质量 + null, // 使用默认模型 + speechRate, + volume, null, - reqVO.getAudioFormat(), + audioFormat, true )); - String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat()); - voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId()); - String objectName = buildFileName(voiceId, format); - String fileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format)); - String presignUrl = fileApi.presignGetUrl(fileUrl, PRESIGN_URL_EXPIRATION_SECONDS); - - PreviewCacheEntry entry = new PreviewCacheEntry(fileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId()); + String format = defaultFormat(ttsResult.getFormat(), audioFormat); + String identifier = StrUtil.isNotBlank(fileUrl) ? "fileUrl" : (StrUtil.isNotBlank(voiceId) ? voiceId : "voice"); + String objectName = buildFileName(identifier, format); + // 上传到OSS,返回原始URL(不是预签名URL) + String resultFileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format)); + log.info("[previewVoice][合成成功,配音编号({}),voiceId({}),fileUrl({}),resultFileUrl({}),format({})]", + voiceConfigId, voiceId, fileUrl, resultFileUrl, format); + + // 生成预签名URL用于返回给前端 + String presignUrl = fileApi.presignGetUrl(resultFileUrl, PRESIGN_URL_EXPIRATION_SECONDS); + + // 缓存中存储原始URL(不是预签名URL),下次使用时再生成预签名URL + PreviewCacheEntry entry = new PreviewCacheEntry(resultFileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId()); savePreviewCache(cacheKey, entry); return buildPreviewResp(entry, presignUrl, voiceId); } private CosyVoiceTtsRequest buildTtsRequest(String text, String voiceId, + String fileUrl, + String referenceText, String model, Float speechRate, Float volume, @@ -428,6 +645,8 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return CosyVoiceTtsRequest.builder() .text(text) .voiceId(voiceId) + .fileUrl(fileUrl) + .referenceText(referenceText) .model(model) .speechRate(speechRate) .volume(volume) @@ -500,6 +719,26 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { return "【情感:" + emotionLabel + "】" + text; } + /** + * 从URL中提取原始URL(去除查询参数和锚点) + * + * @param url 可能包含查询参数的URL + * @return 原始URL(去除查询参数和锚点) + */ + private String extractRawUrl(String url) { + if (StrUtil.isBlank(url)) { + return url; + } + try { + java.net.URL urlObj = new java.net.URL(url); + // 只使用协议、主机、路径部分,忽略查询参数和锚点 + return urlObj.getProtocol() + "://" + urlObj.getHost() + urlObj.getPath(); + } catch (Exception e) { + // 如果URL解析失败,使用简单方式去除查询参数 + return url.split("\\?")[0].split("#")[0]; + } + } + private String buildCacheKey(String prefix, String voiceId, String fileUrl, @@ -509,9 +748,17 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { String emotion, String audioFormat, Integer sampleRate) { - String identifier = StrUtil.isNotBlank(voiceId) - ? voiceId - : StrUtil.blankToDefault(fileUrl, "no-voice"); + // 构建标识符:优先使用voiceId,如果没有则使用fileUrl的稳定部分(去除查询参数) + String identifier; + if (StrUtil.isNotBlank(voiceId)) { + identifier = voiceId; + } else if (StrUtil.isNotBlank(fileUrl)) { + // 对于fileUrl,提取稳定部分(去除预签名URL的查询参数,避免缓存key不稳定) + identifier = extractRawUrl(fileUrl); + } else { + identifier = "no-voice"; + } + String payload = StrUtil.join("|", identifier, text, @@ -584,11 +831,22 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { resolveContentType(format), audioBytes ); - Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null); + Long infraFileId = tikUserFileService.uploadFile(multipartFile, "audio", null); + + // 通过infraFileId查询TikUserFileDO,获取用户文件ID + TikUserFileDO userFile = userFileMapper.selectOne( + new LambdaQueryWrapperX() + .eq(TikUserFileDO::getFileId, infraFileId) + .eq(TikUserFileDO::getUserId, SecurityFrameworkUtils.getLoginUserId()) + .orderByDesc(TikUserFileDO::getId) + .last("LIMIT 1")); + if (userFile == null) { + throw exception(VOICE_FILE_NOT_EXISTS, "文件上传成功但未找到用户文件记录"); + } AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO(); - respVO.setFileId(fileId); - respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId)); + respVO.setFileId(infraFileId); // 返回infraFileId,保持与原有逻辑一致 + respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(userFile.getId())); // 使用TikUserFileDO.id获取播放URL respVO.setFormat(format); respVO.setSampleRate(cache.getSampleRate()); respVO.setRequestId(cache.getRequestId()); @@ -685,21 +943,40 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { @Async public void asyncTranscribeVoice(Long voiceId, String fileUrl) { try { + // 1. 检查是否已经识别过(防重复) + TikUserVoiceDO existingVoice = voiceMapper.selectById(voiceId); + if (existingVoice == null) { + log.warn("[asyncTranscribeVoice][配音记录不存在,配音编号({})]", voiceId); + return; + } + + // 如果已经有识别结果,不再重复识别 + if (StrUtil.isNotBlank(existingVoice.getTranscription())) { + log.info("[asyncTranscribeVoice][配音已经识别过,配音编号({}),跳过识别]", voiceId); + return; + } + log.info("[asyncTranscribeVoice][开始识别,配音编号({}),文件URL({})]", voiceId, fileUrl); Object result = tikHupService.videoToCharacters2(Collections.singletonList(fileUrl)); - + // 解析识别结果 String transcription = extractTranscription(result); - + if (StrUtil.isNotBlank(transcription)) { - // 更新识别结果 - TikUserVoiceDO updateObj = new TikUserVoiceDO() - .setId(voiceId) - .setTranscription(transcription); - voiceMapper.updateById(updateObj); - log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length()); + // 二次检查:解析后再次检查是否已经有识别结果(避免并发重复) + TikUserVoiceDO currentVoice = voiceMapper.selectById(voiceId); + if (currentVoice != null && StrUtil.isBlank(currentVoice.getTranscription())) { + // 更新识别结果 + TikUserVoiceDO updateObj = new TikUserVoiceDO() + .setId(voiceId) + .setTranscription(transcription); + voiceMapper.updateById(updateObj); + log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length()); + } else { + log.info("[asyncTranscribeVoice][并发跳过更新,配音编号({})已经有识别结果]", voiceId); + } } else { - log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]", + log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]", voiceId, result instanceof CommonResult ? ((CommonResult) result).getCode() : "未知"); } } catch (Exception e) { @@ -773,17 +1050,28 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { if (CollUtil.isEmpty(results)) { return null; } - Object lastObj = results.get(results.size() - 1); - if (!(lastObj instanceof JSONObject lastResult)) { + + // 阿里云语音识别:取第一个结果即可 + Object firstObj = results.get(0); + if (!(firstObj instanceof JSONObject firstResult)) { return null; } - String transcriptionUrl = lastResult.getStr("transcription_url"); - if (StrUtil.isBlank(transcriptionUrl)) { - return null; + + // 先从第一个结果中直接提取文本 + String directText = extractTextFromJson(firstResult); + if (StrUtil.isNotBlank(directText)) { + return directText; } - StringBuilder builder = new StringBuilder(); - appendRemoteTranscription(builder, transcriptionUrl); - return builder.length() > 0 ? builder.toString().trim() : null; + + // 如果没有直接文本,尝试获取 transcription_url + String transcriptionUrl = firstResult.getStr("transcription_url"); + if (StrUtil.isNotBlank(transcriptionUrl)) { + StringBuilder builder = new StringBuilder(); + appendRemoteTranscription(builder, transcriptionUrl); + return builder.length() > 0 ? builder.toString().trim() : null; + } + + return null; } } catch (Exception e) { log.warn("[parseTranscriptionText][解析Paraformer结果失败]", e); diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java index ff5a1e4993..7f4f2cd6e7 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java @@ -38,6 +38,9 @@ public class AppTikUserVoiceRespVO { @Schema(description = "备注", example = "这是一个测试配音") private String note; + @Schema(description = "复刻音色ID(CosyVoice 语音复刻生成的 voice_id)") + private String voiceId; + @Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED) private LocalDateTime createTime; diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java index 30231e4b74..a103e5c1a1 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java @@ -1,6 +1,7 @@ package cn.iocoder.yudao.module.tik.voice.vo; import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.Size; import lombok.Data; @@ -10,33 +11,33 @@ import lombok.Data; @Data public class AppTikVoicePreviewReqVO { - @Schema(description = "输入文本") + @Schema(description = "配音编号(tik_user_voice.id),用户配音必传,系统配音可不传") + private Long voiceConfigId; + + @Schema(description = "CosyVoice音色ID(系统配音必传,用户配音可不传)") + private String voiceId; + + @Schema(description = "语音文件URL(当使用语音URL合成时必传,替代voiceId)") + private String fileUrl; + + @Schema(description = "语音文本/识别文本(当使用fileUrl时必传)") + @Size(max = 4000, message = "语音文本不能超过 4000 个字符") + private String transcriptionText; + + @Schema(description = "输入文本(可选,如果不传则使用配音的识别文本或默认文本)") @Size(max = 4000, message = "输入文本不能超过 4000 个字符") private String inputText; - @Schema(description = "识别文本,用于拼接") - @Size(max = 4000, message = "识别文本不能超过 4000 个字符") - private String transcriptionText; - - @Schema(description = "音色 ID(CosyVoice voiceId)") - private String voiceId; - - @Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)") - private String fileUrl; - - @Schema(description = "模型名称,默认 cosyvoice-v2") - private String model; - - @Schema(description = "语速", example = "1.0") + @Schema(description = "语速(可选,默认1.0)", example = "1.0") private Float speechRate; - @Schema(description = "音量", example = "0") + @Schema(description = "音量(可选,默认0)", example = "0") private Float volume; - @Schema(description = "情感", example = "neutral") + @Schema(description = "情感(可选,默认neutral)", example = "neutral") private String emotion; - @Schema(description = "音频格式,默认 wav") + @Schema(description = "音频格式(可选,默认mp3)", example = "mp3") private String audioFormat; } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java index 5630e18685..73688a3dcf 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java @@ -14,6 +14,9 @@ public class AppTikVoiceTtsReqVO { @Size(max = 4000, message = "输入文本不能超过 4000 个字符") private String inputText; + @Schema(description = "配音编号(tik_user_voice.id),用户配音必传,系统配音可不传") + private Long voiceConfigId; + @Schema(description = "识别文本,用于拼接") @Size(max = 4000, message = "识别文本不能超过 4000 个字符") private String transcriptionText; diff --git a/yudao-server/src/main/resources/application.yaml b/yudao-server/src/main/resources/application.yaml index ce3848874e..1ab84c21f3 100644 --- a/yudao-server/src/main/resources/application.yaml +++ b/yudao-server/src/main/resources/application.yaml @@ -12,7 +12,7 @@ spring: servlet: # 文件上传相关配置项 multipart: - max-file-size: 100MB # 单个文件大小 + max-file-size: 100MB # 单个文件大小(配音文件建议50MB以内) max-request-size: 200MB # 设置总上传的文件大小(支持多文件上传) # Jackson 配置项