272 lines
13 KiB
Markdown
272 lines
13 KiB
Markdown
# 阿里云CosyVoice Java SDK 调用模板(参数+示例)
|
||
## 一、前提条件
|
||
1. 开通CosyVoice服务,获取API Key(建议配置到环境变量,避免硬编码)。
|
||
2. 安装最新版DashScope SDK(支持2.20.3+版本,SSML功能需此版本及以上)。
|
||
3. 模型与音色需匹配(如v2模型对应v2音色,v3模型对应v3音色)。
|
||
|
||
## 二、核心参数汇总
|
||
| 参数名 | 类型 | 是否必填 | 默认值 | 取值范围/说明 |
|
||
|--------------|------------|----------|-------------------------|------------------------------------------------------------------------------|
|
||
| model | String | 是 | - | cosyvoice-v1/v2/v3/v3-plus(v3系列需申请邀测) |
|
||
| voice | String | 是 | - | 对应模型的音色(如v2:longxiaochun_v2;v3:longhuohuo_v3,详见文档音色列表) |
|
||
| format | enum | 否 | 因音色而异(默认MP3 22050Hz) | 支持WAV/MP3/PCM/OGG_OPUS,如PCM_22050HZ_MONO_16BIT、MP3_24000HZ_MONO_256KBPS |
|
||
| volume | int | 否 | 50 | 0~100(音量大小) |
|
||
| speechRate | float | 否 | 1.0 | 0.5~2.0(语速,1.0为默认,约4字/秒) |
|
||
| pitchRate | float | 否 | 1.0 | 0.5~2.0(语调) |
|
||
| bit_rate | int | 否 | 32 | 6~510kbps(仅opus格式支持,v1模型不支持) |
|
||
| seed | int | 否 | 0 | 0~65535(随机数种子,仅v3/v3-plus支持) |
|
||
| style | int | 否 | 0 | ≥0整数(风格调整,仅v3/v3-plus支持) |
|
||
| languageHints| List<String> | 否 | - | 仅v3/v3-plus支持,单次配置1个语种("zh"/"en") |
|
||
| instruction | String | 否 | - | 仅v3/v3-plus支持,格式:"你说话的情感是<情感值>"(如"Happy"/"Angry") |
|
||
|
||
## 三、四种核心调用方式模板
|
||
### 1. 同步调用(阻塞式,适合短文本)
|
||
```java
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||
import java.io.File;
|
||
import java.io.FileOutputStream;
|
||
import java.nio.ByteBuffer;
|
||
|
||
public class SyncCallTemplate {
|
||
// 配置参数(根据需求修改)
|
||
private static final String MODEL = "cosyvoice-v3"; // 模型
|
||
private static final String VOICE = "longhuohuo_v3"; // 音色
|
||
private static final String TEXT = "今天天气真好,适合出门散步!"; // ≤2000字符
|
||
private static final String OUTPUT_FILE = "output.mp3"; // 输出文件
|
||
|
||
public static void main(String[] args) {
|
||
// 1. 构建请求参数
|
||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||
// .apiKey("your-api-key") // 未配置环境变量时打开
|
||
.model(MODEL)
|
||
.voice(VOICE)
|
||
.volume(60) // 可选:调整音量
|
||
.speechRate(1.1f) // 可选:调整语速
|
||
.build();
|
||
|
||
// 2. 初始化合成器(同步调用传null)
|
||
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null);
|
||
ByteBuffer audioData = null;
|
||
|
||
try {
|
||
// 3. 阻塞调用,获取完整音频
|
||
audioData = synthesizer.call(TEXT);
|
||
// 4. 保存音频到本地
|
||
if (audioData != null) {
|
||
try (FileOutputStream fos = new FileOutputStream(new File(OUTPUT_FILE))) {
|
||
fos.write(audioData.array());
|
||
}
|
||
System.out.println("合成成功!输出文件:" + OUTPUT_FILE);
|
||
System.out.println("RequestId:" + synthesizer.getLastRequestId());
|
||
System.out.println("首包延迟:" + synthesizer.getFirstPackageDelay() + "ms");
|
||
}
|
||
} catch (Exception e) {
|
||
System.err.println("合成失败:" + e.getMessage());
|
||
} finally {
|
||
// 5. 关闭WebSocket连接
|
||
synthesizer.getDuplexApi().close(1000, "任务结束");
|
||
}
|
||
}
|
||
}
|
||
```
|
||
|
||
### 2. 异步调用(非阻塞,短文本实时接收)
|
||
```java
|
||
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||
import com.alibaba.dashscope.common.ResultCallback;
|
||
import java.util.concurrent.CountDownLatch;
|
||
|
||
public class AsyncCallTemplate {
|
||
private static final String MODEL = "cosyvoice-v2";
|
||
private static final String VOICE = "longxiaochun_v2";
|
||
private static final String TEXT = "欢迎使用阿里云CosyVoice语音合成服务!"; // ≤2000字符
|
||
|
||
public static void main(String[] args) throws InterruptedException {
|
||
CountDownLatch latch = new CountDownLatch(1);
|
||
|
||
// 1. 配置回调(实时接收音频)
|
||
ResultCallback<SpeechSynthesisResult> callback = new ResultCallback<SpeechSynthesisResult>() {
|
||
@Override
|
||
public void onEvent(SpeechSynthesisResult result) {
|
||
// 接收音频分片(可实时播放或写入文件)
|
||
if (result.getAudioFrame() != null) {
|
||
System.out.println("收到音频分片,大小:" + result.getAudioFrame().capacity() + "字节");
|
||
// 此处可添加音频处理逻辑(如流式播放、追加写入文件)
|
||
}
|
||
// 查看计费字符数(最终以最后一次为准)
|
||
if (result.getUsage() != null) {
|
||
System.out.println("当前计费字符数:" + result.getUsage().getCharacters());
|
||
}
|
||
}
|
||
|
||
@Override
|
||
public void onComplete() {
|
||
System.out.println("合成完成!");
|
||
latch.countDown();
|
||
}
|
||
|
||
@Override
|
||
public void onError(Exception e) {
|
||
System.err.println("合成失败:" + e.getMessage());
|
||
latch.countDown();
|
||
}
|
||
};
|
||
|
||
// 2. 构建参数并初始化合成器
|
||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||
// .apiKey("your-api-key")
|
||
.model(MODEL)
|
||
.voice(VOICE)
|
||
.format(SpeechSynthesisAudioFormat.MP3_16000HZ_MONO_128KBPS) // 可选配置格式
|
||
.build();
|
||
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback);
|
||
|
||
try {
|
||
// 3. 非阻塞调用
|
||
synthesizer.call(TEXT);
|
||
latch.await(); // 等待合成完成
|
||
System.out.println("RequestId:" + synthesizer.getLastRequestId());
|
||
} catch (Exception e) {
|
||
System.err.println("调用异常:" + e.getMessage());
|
||
} finally {
|
||
synthesizer.getDuplexApi().close(1000, "任务结束");
|
||
}
|
||
}
|
||
}
|
||
```
|
||
|
||
### 3. 流式调用(分段传文本,适合长文本)
|
||
```java
|
||
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||
import com.alibaba.dashscope.common.ResultCallback;
|
||
|
||
public class StreamingCallTemplate {
|
||
// 分段文本(每段≤2000字符,累计≤20万字符)
|
||
private static final String[] TEXT_SEGMENTS = {
|
||
"流式语音合成适合长文本场景,",
|
||
"可以分段发送文本,",
|
||
"服务端实时返回音频,",
|
||
"减少等待时间。"
|
||
};
|
||
private static final String MODEL = "cosyvoice-v3";
|
||
private static final String VOICE = "longchuanshu_v3";
|
||
|
||
public static void main(String[] args) {
|
||
// 1. 配置回调
|
||
ResultCallback<SpeechSynthesisResult> callback = new ResultCallback<SpeechSynthesisResult>() {
|
||
@Override
|
||
public void onEvent(SpeechSynthesisResult result) {
|
||
if (result.getAudioFrame() != null) {
|
||
System.out.println("收到流式音频分片");
|
||
// 处理音频(如实时播放、写入缓冲文件)
|
||
}
|
||
}
|
||
|
||
@Override
|
||
public void onComplete() {
|
||
System.out.println("流式合成全部完成!");
|
||
}
|
||
|
||
@Override
|
||
public void onError(Exception e) {
|
||
System.err.println("流式合成失败:" + e.getMessage());
|
||
}
|
||
};
|
||
|
||
// 2. 构建参数
|
||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||
// .apiKey("your-api-key")
|
||
.model(MODEL)
|
||
.voice(VOICE)
|
||
.format(SpeechSynthesisAudioFormat.PCM_22050HZ_MONO_16BIT)
|
||
.speechRate(0.9f)
|
||
.build();
|
||
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback);
|
||
|
||
try {
|
||
// 3. 分段发送文本(间隔≤23秒)
|
||
for (String segment : TEXT_SEGMENTS) {
|
||
synthesizer.streamingCall(segment);
|
||
Thread.sleep(500); // 模拟文本输入间隔
|
||
}
|
||
// 4. 必须调用:结束流式合成(触发剩余文本合成)
|
||
synthesizer.streamingComplete();
|
||
System.out.println("RequestId:" + synthesizer.getLastRequestId());
|
||
} catch (Exception e) {
|
||
System.err.println("调用异常:" + e.getMessage());
|
||
} finally {
|
||
synthesizer.getDuplexApi().close(1000, "任务结束");
|
||
}
|
||
}
|
||
}
|
||
```
|
||
|
||
### 4. Flowable调用(响应式编程,支持流式输入输出)
|
||
```java
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
|
||
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
|
||
import com.alibaba.dashscope.exception.NoApiKeyException;
|
||
import io.reactivex.Flowable;
|
||
import io.reactivex.BackpressureStrategy;
|
||
|
||
public class FlowableCallTemplate {
|
||
private static final String MODEL = "cosyvoice-v2";
|
||
private static final String VOICE = "longyingtian_v2";
|
||
private static final String[] TEXT_ARRAY = {"响应式编程模式,", "支持流式输入输出,", "适合高并发场景。"};
|
||
|
||
public static void main(String[] args) throws NoApiKeyException {
|
||
// 1. 模拟流式文本输入
|
||
Flowable<String> textStream = Flowable.create(emitter -> {
|
||
new Thread(() -> {
|
||
for (String text : TEXT_ARRAY) {
|
||
emitter.onNext(text);
|
||
try {
|
||
Thread.sleep(800);
|
||
} catch (InterruptedException e) {
|
||
emitter.onError(e);
|
||
}
|
||
}
|
||
emitter.onComplete();
|
||
}).start();
|
||
}, BackpressureStrategy.BUFFER);
|
||
|
||
// 2. 构建参数
|
||
SpeechSynthesisParam param = SpeechSynthesisParam.builder()
|
||
// .apiKey("your-api-key")
|
||
.model(MODEL)
|
||
.voice(VOICE)
|
||
.volume(70)
|
||
.build();
|
||
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null);
|
||
|
||
try {
|
||
// 3. 流式调用并处理结果
|
||
synthesizer.streamingCallAsFlowable(textStream)
|
||
.blockingForEach(result -> {
|
||
if (result.getAudioFrame() != null) {
|
||
System.out.println("Flowable收到音频,大小:" + result.getAudioFrame().capacity() + "字节");
|
||
// 处理音频逻辑
|
||
}
|
||
});
|
||
System.out.println("Flowable合成完成!RequestId:" + synthesizer.getLastRequestId());
|
||
} finally {
|
||
synthesizer.getDuplexApi().close(1000, "任务结束");
|
||
}
|
||
}
|
||
}
|
||
```
|
||
|
||
## 四、核心注意事项
|
||
1. 文本长度限制:非流式单次≤2000字符,流式累计≤20万字符(含SSML标签)。
|
||
2. 字符计算规则:汉字=2字符,英文/数字/标点/空格=1字符。
|
||
3. 流式调用必须调用`streamingComplete()`,否则结尾文本无法合成。
|
||
4. 每次调用`call()`前需重新初始化`SpeechSynthesizer`实例。
|
||
5. 音频格式需与播放器兼容(如MP3/OPUS支持流式播放,推荐使用ffmpeg、AudioFormat等工具)。
|
||
|