You need to enable JavaScript to run this app.
导航
Moonshot-v1-8k API 调用指南
最近更新时间:2024.12.04 21:20:38首次发布时间:2023.10.17 15:54:21

注意

即将下线,请用新版API/SDK,详细请参见火山方舟 SDK V1/V2下线公告及迁移指引

Moonshot-v1-8k 是 Moonshot AI 推出的一款千亿参数的语言模型,具备优秀的语义理解、指令遵循和文本生成能力。支持 8K 上下文窗口,适合短文本实时交互场景。本文详细介绍了 Moonshot-v1-8k 的SDK及API使用方法。

API Host

Host:maas-api.ml-platform-cn-beijing.volces.com
Region: cn-beijing

API SDK

提供统一 SDK 的接入形式(需要用 ak/sk 进行旁路鉴权,火山鉴权逻辑可以参考

说明

调用前请修改:

  1. 设置环境变量(或在代码中修改):VOLC_ACCESSKEYVOLC_SECRETKEY
  2. 修改调用模型名占位符{YOUR_ENDPOINT_ID}(具体请参考 API Specification

Golang

// Usage:
//
// 1. go get -u github.com/volcengine/volc-sdk-golang
// 2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY go run main.go
package main

import (
        "encoding/json"
        "errors"
        "fmt"
        "os"

        api "github.com/volcengine/volc-sdk-golang/service/maas/models/api/v2"
        client "github.com/volcengine/volc-sdk-golang/service/maas/v2"
)

func main() {
        r := client.NewInstance("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing")

        // fetch ak&sk from environmental variables
        r.SetAccessKey(os.Getenv("VOLC_ACCESSKEY"))
        r.SetSecretKey(os.Getenv("VOLC_SECRETKEY"))

        req := &api.ChatReq{
                Messages: []*api.Message{
                        {
                                Role:    api.ChatRoleUser,
                                Content: "天为什么这么蓝?",
                        },
                        {
                                Role:    api.ChatRoleAssistant,
                                Content: "因为有你",
                        },
                        {
                                Role:    api.ChatRoleUser,
                                Content: "花儿为什么这么香?",
                        },
                },           
        }

        endpointId := "{YOUR_ENDPOINT_ID}"
        TestNormalChat(r, endpointId, req)
        TestStreamChat(r, endpointId, req)
}

func TestNormalChat(r *client.MaaS, endpointId string, req *api.ChatReq) {
        got, status, err := r.Chat(endpointId, req)
        if err != nil {
                errVal := &api.Error{}
                if errors.As(err, &errVal) { // the returned error always type of *api.Error
                        fmt.Printf("meet maas error=%v, status=%d\n", errVal, status)
                }
                return
        }
        fmt.Println("chat answer", mustMarshalJson(got))
}

func TestStreamChat(r *client.MaaS, endpointId string, req *api.ChatReq) {
        ch, err := r.StreamChat(endpointId, req)
        if err != nil {
                errVal := &api.Error{}
                if errors.As(err, &errVal) { // the returned error always type of *api.Error
                        fmt.Println("meet maas error", errVal.Error())
                }
                return
        }

        for resp := range ch {
                if resp.Error != nil {
                        // it is possible that error occurs during response processing
                        fmt.Println(mustMarshalJson(resp.Error))
                        return
                }
                fmt.Println(mustMarshalJson(resp))
                // last response may contain `usage`
                if resp.Usage != nil {
                        // last message, will return full response including usage, role, finish_reason, etc.
                        fmt.Println(mustMarshalJson(resp.Usage))
                }
        }
}

func mustMarshalJson(v interface{}) string {
        s, _ := json.Marshal(v)
        return string(s)
}

Python

注意

目前仅支持 python>=3.5

'''
Usage:

1. python3 -m pip install --user volcengine
2. VOLC_ACCESSKEY=XXXXX VOLC_SECRETKEY=YYYYY python main.py
'''
import os
from volcengine.maas.v2 import MaasService
from volcengine.maas import MaasException, ChatRole

def test_chat(maas, endpoint_id, req):
    try:
        resp = maas.chat(endpoint_id, req)
        print(resp)
    except MaasException as e:
        print(e)

def test_stream_chat(maas, endpoint_id, req):
    try:
        resps = maas.stream_chat(endpoint_id, req)
        for resp in resps:
            print(resp)
    except MaasException as e:
        print(e)

if __name__ == '__main__':
    maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')

    maas.set_ak(os.getenv("VOLC_ACCESSKEY"))
    maas.set_sk(os.getenv("VOLC_SECRETKEY"))

    # document: "https://www.volcengine.com/docs/82379/1099475"
    # chat
    req = {
        
        "messages": [
            {
                "role": ChatRole.USER,
                "content": "天为什么这么蓝"
            }, {
                "role": ChatRole.ASSISTANT,
                "content": "因为有你"
            }, {
                "role": ChatRole.USER,
                "content": "花儿为什么这么香?"
            },
        ]
    }

    endpoint_id = "{YOUR_ENDPOINT_ID}"
    test_chat(maas, endpoint_id, req)
    test_stream_chat(maas, endpoint_id, req)
#

Java

/*

# pom.xml

<dependency>
        <groupId>com.volcengine</groupId>
        <artifactId>volc-sdk-java</artifactId>
        <version>LATEST</version>

</dependency>

*/

package com.volcengine.example.maas.v2;

import com.volcengine.model.maas.api.v2.*;
import com.volcengine.service.maas.MaasException;
import com.volcengine.service.maas.v2.MaasService;
import com.volcengine.service.maas.v2.impl.MaasServiceImpl;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.stream.Stream;

public class ChatV2Demo {
    public static void main(String[] args) {
        MaasService maasService = new MaasServiceImpl("maas-api.ml-platform-cn-beijing.volces.com", "cn-beijing");

        // fetch ak&sk from environmental variables
        maasService.setAccessKey(System.getenv("VOLC_ACCESSKEY"));
        maasService.setSecretKey(System.getenv("VOLC_SECRETKEY"));

        ChatReq req = new ChatReq()
                .withMessages(new ArrayList<>(Arrays.asList(
                        new Message().withRole(Message.ChatRole.USER).withContent("天为什么这么蓝?"),
                        new Message().withRole(Message.ChatRole.ASSISTANT).withContent("因为有你"),
                        new Message().withRole(Message.ChatRole.USER).withContent("什么是中国")
                )));

        String endpointId = "${YOUR_ENDPOINT_ID}";
        testChat(maasService, endpointId, req);
        testStreamChat(maasService, endpointId, req);
    }

    private static void testChat(MaasService maasService, String endpointId, ChatReq req) {
        try {
            ChatResp resp = maasService.chat(endpointId, req);
            System.out.println(resp.getChoices().get(0).getMessage().getContent());
            System.out.println(resp.getUsage());
        } catch (MaasException e) {
            System.out.println("req_id: " + e.getRequestId());
            System.out.println("code: " + e.getCode());
            System.out.println("code_n: " + e.getCodeN());
            System.out.println("message: " + e.getMsg());
            e.printStackTrace();
        }
    }

    private static void testStreamChat(MaasService maasService, String endpointId, ChatReq req) {
        Stream<ChatResp> resps = null;
        try {
            resps = maasService.streamChat(endpointId, req);
        } catch (MaasException e) {
            e.printStackTrace();
        }


        assert resps != null;

        // it is possible that error occurs during response processing
        try {
            resps.forEach(resp -> {
                System.out.println(resp.getChoices().get(0).getMessage().getContent());

                // last message, will return full response including usage, role, finish_reason, etc.
                if (resp.getUsage() != null && resp.getUsage().getTotalTokens() > 0) {
                    System.out.println(resp.getUsage());
                }
            });
        } catch (RuntimeException e) {
            Throwable cause = e.getCause();
            if (cause instanceof MaasException) {
                System.out.println("req_id: " + ((MaasException) cause).getRequestId());
                System.out.println("code: " + ((MaasException) cause).getCode());
                System.out.println("code_n: " + ((MaasException) cause).getCodeN());
                System.out.println("message: " + ((MaasException) cause).getMsg());
            }
            System.out.println("caught: " + e);
        }
    }
}

API Specification

Chat

主要参考 OpenAIHuggingFace
Parameters 记录可选控制参数,具体哪些参数可用依赖模型服务(模型详情页会描述哪些参数可用)

Input

字段

类型

描述

messages (required)

list

[
  {
    "role": "user",
    "content": "how to learn python?"
  }
]
  1. role:消息角色,目前支持user/assistant/system
  2. content:消息内容

消息是列表形式,依次记录了消息聊天上下文(第一个是最早的对话,最后一个是最新的对话),形式上类似:Q1, A1, Q2, A2, Q3(输入应该是奇数个)

stream

boolean

是否流式返回。如果为 true,则按 SSE 协议返回数据

parameters.max_new_tokens

integer

最多新生成 token 数(不包含 prompt 的 token 数目),和max_tokens一般二选一设置,[1, 4000]

1000(依赖模型默认配置)

parameters.temperature

number

采样温度,(0, 1.0]

parameters.top_p

number

核采样,[0, 1.0]

parameters.top_k

integer

top-k-filtering 算法保留多少个 最高概率的词 作为候选,正整数。

parameters.stop

list

用于指定模型在生成响应时应停止的标记。当模型生成的响应中包含这些标记时,生成过程将停止

parameters.logit_bias

map<string,number>

接受一个map,该对象将token(token id使用tokenization接口获取)映射到从-100到100的关联偏差值。每个模型的效果有所不同,但-1和1之间的值会减少或增加选择的可能性;-100或100应该导致禁止或排他选择相关的token。

Output

字段

类型

描述

req_id

string

请求 id

choice

object

{
    "message": {
        "role": "assistant",
        "content": "Learning Python can be a fun and rewarding experience, and there are many resources available to help you get started. Here are some steps you can take to learn Python:\n\n1. Start by installing Python on your computer. You can download the latest version of Python from the official website at <https://www.python.org/downloads/>.\n2. Once you have Python installed, open a terminal or command prompt and enter `python` to start learning the language. This will open a Python interpreter, where you can start writing and running code.\n3. Learn the basics of Python by reading through the official Python documentation at <https://docs.python.org/3/>. This will cover topics such as variables, data types, control structures, functions, and modules.\n4. There are many online resources available to help you learn Python, including tutorials, courses, and forums. Some popular resources include Codecademy, edX, and Coursera.\n5. Practice writing and running code. The more you practice, the better you will become at Python. You can find many exercises and projects online to help you practice.\n6. Join a Python community. Python has a large and active community of developers, and joining one of these communities can be a great way to learn from others and get support. You can find communities on social media, online forums, and through online communities such as Stack Overflow.\n\nOverall, learning Python requires practice and dedication, but with the right resources and approach, it can be a fun and rewarding experience."
    },
    "finish_reason": "stop",
}
  1. message:同上说明
  2. finish_reason:结束原因
    1. stop表示正常生成结束
    2. length表示已经到了制定的最大的 token 数量(max_tokens/max_new_tokens

usage

object

{
    "prompt_tokens": 18,
    "completion_tokens": 317,
    "total_tokens": 335
}
  1. prompt_tokens:提示的 prompt token 数量
  2. completion_tokens:生成的 token 数量
  3. total_tokens:总的 token 数量

在 stream 模式下,只有最后一个输出 frame 才会记录 usage 内容

error(optioanl)

object

{
    "code": "UnauthorizedCode",
    "code_n": 123456,
    "message": "You are not authorized"
}
  1. code:错误的代码;
  2. code_n:错误的数字代码;
  3. message:错误的信息。

如果没有错误,error 这个条目为空
如果有错误,其余条目都为空,只有 error 条目

在 stream 模式下,基于 SSE (Server-Sent Events) 协议返回生成内容,每次返回结果为生成的部分内容片段:

  • 内容片段按照生成的先后顺序返回,完整的结果需要调用者拼接才能得到;
  • 如果流式请求开始时就出现错误(如参数错误),HTTP返回非200,方法调用也会直接返回错误;
  • 如果流式过程中出现错误,HTTP 依然会返回 200, 错误信息会在一个片段返回。