请先查看接入必读了解具体接入方式,再参考此文档完成接入。
payload
字段为将请求参数序列化后的json文本参考详细说明功能调用-通用协议。
payload
配置参数为json字符串格式
字段 | 描述 | 类型 | 是否必传 | 默认值 |
---|---|---|---|---|
text | 输入文本 | string | 否。text与ssml字段至少一个非空,若二者都非空则按照ssml字段 | - |
ssml | 输入文本(SSML格式),与text字段至少一个非空 | string | 否。text与ssml字段至少一个非空,若二者都非空则按照ssml字段 | - |
speaker | 发音人,具体见附录:发音人列表 | string | 是 | - |
audio_config | 补充参数 | object | 否 | |
audio_config.format | 输出音频编码格式,wav/mp3/aac | string | 否 | mp3 |
audio_config.sample_rate | 输出音频采样率,可选值 [8000,16000,22050,24000,32000,44100,48000] | number | 否 | 24000 |
audio_config.speech_rate | 语速,取值范围[-50,100],100代表2.0倍速,-50代表0.5倍数 | number | 否 | 0 |
audio_config.pitch_rate | 音调,取值范围[-12,12] | number | 否 | 0 |
audio_config.enable_timestamp | 是否选择同时返回字与音素时间戳 | bool | 否 | false |
示例:
{ "text": "欢迎使用文本转语音服务。", "speaker": "zh_female_qingxin", "audio_config": { "format": "wav", "sample_rate": 16000 } }
HTTP响应Content-Type: application/json
字段 | 描述 | 类型 |
---|---|---|
task_id | 请求任务id,用于链路追踪、问题排查 | string |
namespace | 服务接口命名空间,比如TTS | string |
data | 请求响应二进制数据,标准base64编码 | string |
payload | 请求响应文本信息,json字符串格式 | string |
status_code | 状态码 | number |
status_text | 状态信息 | string |
响应结果payload为json字符串格式,json内容格式如下:
字段 | 描述 | 类型 |
---|---|---|
duration | 音频时长,单位秒 | number |
words | 字的时间戳,单位秒。需要请求参数audio_config.enable_timestamp =true | array |
words.word | 字内容 | string |
words.start_time | 当前字开始时间 | number |
words.end_time | 当前字结束时间 | number |
phonemes | 音素的时间戳,单位秒。需要请求参数audio_config.enable_timestamp =true | array |
phonemes.phone | 音素内容 | string |
phonemes.start_time | 当前音素开始时间 | number |
phonemes.end_time | 当前音素结束时间 | number |
payload示例:
{ "duration": 3.0, "words": [ { "word": "你", "start_time": "0", "end_time": "0.05" }, ... ], "phonemes": [ { "phone": "C0n", "start_time": "0", "end_time": "0.025" }, ... ] }
非流式调用方式为:POST /api/v1/invoke
// Code sample: // use http client to invoke SAMI HTTP Service package main import ( "bytes" "encoding/json" "fmt" "io/ioutil" "log" "net/http" "time" ) type InvokeResponse struct { StatusCode int32 `form:"status_code,required" json:"status_code,required" query:"status_code,required"` StatusText string `form:"status_text,required" json:"status_text,required" query:"status_text,required"` TaskId string `form:"task_id,required" json:"task_id,required" query:"task_id,required"` Namespace string `form:"namespace,required" json:"namespace,required" query:"namespace,required"` Payload *string `form:"payload,omitempty" json:"payload,omitempty" query:"payload,omitempty"` Data []byte `form:"data,omitempty" json:"data,omitempty" query:"data,omitempty"` State *string `form:"state,omitempty" json:"state,omitempty" query:"state,omitempty"` } const ( domain = "https://sami.bytedance.com" // auth token appkey = "your_appKey" // SAMI method version = "v4" namespace = "TTS" // dump output dataOutputFile = "output.wav" payloadOutputFile = "output.json" isDump = true ) func main() { // Get token token := "your_token" // Construct HTTP request // 1. Read local audio file and construct request payload // 2. Set HTTP json body // 3. Do HTTP POST request speaker := "zh_female_qingxin" text := "欢迎使用文本转语音服务。" body := fmt.Sprintf( `{"payload":"{\"speaker\":\"%v\",\"text\":\"%v\",\"audio_config\": {\"format\":\"wav\"}}"}`, speaker, text, ) urlPath := fmt.Sprintf( "%v/api/v1/invoke?version=%v&token=%v&appkey=%v&namespace=%v", domain, version, token, appkey, namespace, ) log.Printf("invoke request: %v", urlPath) // HTTP POST request start := time.Now() resp, err := http.Post(urlPath, "application/json", bytes.NewBuffer([]byte(body))) if err != nil { panic(err) } defer resp.Body.Close() // Parse HTTP response ret, err := ioutil.ReadAll(resp.Body) if err != nil || resp.StatusCode != http.StatusOK { panic(string(ret)) } log.Printf("http invoke: cost=%vms", time.Since(start).Milliseconds()) // parse SAMI response samiResp := InvokeResponse{} payloadStr := "" if err = json.Unmarshal(ret, &samiResp); err != nil { log.Println("parse response failed", string(ret), err) panic(err) } if samiResp.Payload != nil { payloadStr = *samiResp.Payload } log.Printf("response task_id=%v, payload=%v, data=[%d]byte", samiResp.TaskId, payloadStr, len(samiResp.Data)) if isDump && samiResp.Payload != nil { _ = ioutil.WriteFile(payloadOutputFile, []byte(*samiResp.Payload), 0644) } if isDump && len(samiResp.Data) > 0 { _ = ioutil.WriteFile(dataOutputFile, samiResp.Data, 0644) } }
<?php /*********************** * * tts demo * */ $url = "https://sami.bytedance.com/api/v1/invoke"; // 设置appkey、token信息 $appkey = "your-appkey"; $token = "your-token"; // 设置TTS请求参数 $speaker = "zh_female_qingxin"; $text = "欢迎使用智能语音服务。"; $format = "wav"; $sampleRate = 24000; $audioConfig = array( "format" => $format, "sample_rate" => $sampleRate ); $payload = array( "speaker" => $speaker, "text" => $text, "audio_config" =>$audioConfig ); $payloadObj = json_encode($payload, JSON_UNESCAPED_UNICODE); print "payload obj " . $payloadObj . "\n"; $body = array( "appkey" => $appkey, "token" => $token, "namespace" => "TTS", "payload" => $payloadObj ); $bodyObj = json_encode($body, JSON_UNESCAPED_UNICODE); print "body obj: " . $bodyObj . "\n"; // 发送http请求 $ch = curl_init(); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, TRUE); $contentType = "application/json; charset=utf-8"; $httpHeaders = array( "Content-type:" . $contentType ); curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); curl_setopt($ch, CURLOPT_POSTFIELDS, $bodyObj); curl_setopt($ch, CURLOPT_HEADER, TRUE); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); $response = curl_exec($ch); if ($response == FALSE) { print "curl_exec failed!\n"; print curl_error($ch); curl_close($ch); return ; } $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $responseHeaders = substr($response, 0, $headerSize); $responseBody = substr($response, $headerSize); curl_close($ch); print $responseHeaders . "\n"; if ($httpCode != 200) { print $responseBody . "\n"; return ; } $resultObj = json_decode($responseBody, TRUE); $statusCode = $resultObj["status_code"]; if ($statusCode == 20000000) { $bodyPayload = $resultObj["payload"]; $bodyPayloadObj = json_decode($bodyPayload, TRUE); $duration = $bodyPayloadObj["duration"]; print $duration . "\n"; $data = $resultObj["data"]; $dataDecode = base64_decode($data); file_put_contents("tts_result.wav", $dataDecode); } ?>
HTTP状态码 | 业务状态码 | 错误信息 | 错误说明 | 解决办法 |
---|---|---|---|---|
400 | 40402004 | TTSInvalidSpeaker | TTS 发音人设置无效 | 检查TTS 发音人是否正确设置 |
400 | 40402001 | TTSEmptyText | TTS 未设置文本 | 检查TTS 文本是否设置 |
400 | 40402002 | TTSInvalidText | TTS 设置文本非法 | 检查TTS 文本与发音人可能不匹配、无可读内容 |
400 | 40402003 | TTSExceededTextLimit | TTS 文本长度超限 | 检查TTS 文本是否超限。非流式接口上限为 1000 个utf-8字符;流式接口上限为 2000 个utf-8字符(包括空格、标点、汉字、字母等) |