-
Notifications
You must be signed in to change notification settings - Fork 6
/
llm-open-connector.yml
231 lines (221 loc) · 7.62 KB
/
llm-open-connector.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
openapi: 3.0.0
info:
title: Salesforce LLM Open Connector API
description: "The LLM Open Connector API allows Salesforce customers and partners to provide access to LLMs in a standard way so that they can be consumed by the Einstein 1 platform. This API is based on OpenAI's API with significant modifications to accommodate Salesforce use cases."
version: "v1"
termsOfService: ""
contact:
name: Einstein Foundations
url: https://www.salesforce.com/artificial-intelligence/
license:
name: MIT
url: https://github.com/salesforce/generic-llm-connector-openapi/blob/master/LICENSE
servers:
- url: https://bring-your-own-llm.example.com
tags:
- name: Chat
description: Given a list of messages comprising a conversation, the model will return a response.
paths:
/chat/completions:
post:
operationId: createChatCompletion
tags:
- Chat
summary: Creates a model response for the given chat conversation.
security:
- ApiKeyAuth: []
requestBody:
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/CreateChatCompletionRequest"
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: "#/components/schemas/CreateChatCompletionResponse"
components:
securitySchemes:
ApiKeyAuth:
type: apiKey
in: header
name: api-key
description: "API key required to access the API, passed in the header."
schemas:
Error:
type: object
properties:
code:
type: string
nullable: true
message:
type: string
nullable: false
param:
type: string
nullable: true
type:
type: string
nullable: false
required:
- type
- message
- param
- code
ErrorResponse:
type: object
properties:
error:
$ref: "#/components/schemas/Error"
required:
- error
CompletionUsage:
type: object
description: Usage statistics for the completion request.
properties:
completion_tokens:
type: integer
description: Number of tokens in the generated completion.
prompt_tokens:
type: integer
description: Number of tokens in the prompt.
total_tokens:
type: integer
description: Total number of tokens used in the request (prompt + completion).
required:
- prompt_tokens
- completion_tokens
- total_tokens
CreateChatCompletionRequest:
type: object
properties:
messages:
description: A list of messages comprising the conversation so far.
type: array
minItems: 1
items:
$ref: "#/components/schemas/ChatCompletionRequestMessage"
model:
description: ID of the model to use.
example: "gpt-4-turbo"
type: string
max_tokens:
description: |
The maximum number of [tokens](/tokenizer) that can be generated in the chat completion.
The total length of input tokens and generated tokens is limited by the model's context length.
type: integer
nullable: true
n:
type: integer
minimum: 1
maximum: 128
default: 1
example: 1
nullable: true
description: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.
temperature:
type: number
minimum: 0
maximum: 2
default: 1
example: 1
nullable: true
description: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
parameters:
description: "Dictionary of any other parameters that are required by the provider. Values are passed as is to the provider so that the request can include parameters that are unique to a provider."
type: object
additionalProperties: true
example: { "top_p": 0.5 }
required:
- model
- messages
CreateChatCompletionResponse:
type: object
description: Represents a chat completion response returned by model, based on the provided input.
properties:
id:
type: string
description: A unique identifier for the chat completion.
choices:
type: array
description: A list of chat completion choices. Can be more than one if `n` is greater than 1.
items:
type: object
required:
- finish_reason
- index
- message
properties:
finish_reason:
type: string
description: &chat_completion_finish_reason_description |
The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,
`length` if the maximum number of tokens specified in the request was reached,
`content_filter` if content was omitted due to a flag from our content filters,
`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.
enum:
[
"stop",
"length",
"tool_calls",
"content_filter",
"function_call",
]
index:
type: integer
description: The index of the choice in the list of choices.
message:
$ref: "#/components/schemas/ChatCompletionResponseMessage"
created:
type: integer
description: The Unix timestamp (in seconds) of when the chat completion was created. Model endpoints that pass timestamps in milliseconds must be converted to seconds.
model:
type: string
description: The model used for the chat completion.
object:
type: string
description: The object type, which is always `chat.completion`.
enum: [chat.completion]
usage:
$ref: "#/components/schemas/CompletionUsage"
required:
- choices
- created
- id
- model
- object
ChatCompletionRequestMessage:
type: object
title: Chat Completion Message
properties:
content:
description: The contents of the message.
type: string
role:
type: string
enum: ["system", "user", "assistant"]
description: The role of the messages author.
name:
type: string
description: An optional name for the participant. Provides the model information to differentiate between participants of the same role.
required:
- content
- role
ChatCompletionResponseMessage:
type: object
title: A chat completion message generated by the model.
properties:
content:
type: string
description: The contents of the message.
nullable: true
role:
type: string
enum: ["assistant"]
description: The role of the author of this message.
required:
- role
- content