Create Assistant

POST

https://api.vapi.ai/assistant

POST

/assistant

1 curl -X POST https://api.vapi.ai/assistant \
2      -H "Authorization: Bearer <token>" \
3      -H "Content-Type: application/json" \
4      -d '{}'

Try it

201Created

1 {
2   "id": "foo",
3   "orgId": "foo",
4   "createdAt": "foo",
5   "updatedAt": "foo",
6   "transcriber": {
7     "provider": "assembly-ai",
8     "language": "en",
9     "confidenceThreshold": 0.4,
10     "formatTurns": true,
11     "endOfTurnConfidenceThreshold": 0.7,
12     "minEndOfTurnSilenceWhenConfident": 160,
13     "wordFinalizationMaxWaitTime": 160,
14     "maxTurnSilence": 400,
15     "realtimeUrl": "foo",
16     "wordBoost": [
17       "foo"
18     ],
19     "endUtteranceSilenceThreshold": 42,
20     "disablePartialTranscripts": true,
21     "fallbackPlan": {
22       "transcribers": [
23         {
24           "provider": "assembly-ai",
25           "language": "en",
26           "confidenceThreshold": 0.4,
27           "formatTurns": true,
28           "endOfTurnConfidenceThreshold": 0.7,
29           "minEndOfTurnSilenceWhenConfident": 160,
30           "wordFinalizationMaxWaitTime": 160,
31           "maxTurnSilence": 400,
32           "realtimeUrl": "foo",
33           "wordBoost": [
34             "foo"
35           ],
36           "endUtteranceSilenceThreshold": 42,
37           "disablePartialTranscripts": true
38         }
39       ]
40     }
41   },
42   "model": {
43     "messages": [
44       {
45         "content": "foo",
46         "role": "assistant"
47       }
48     ],
49     "tools": [
50       {
51         "messages": [
52           {
53             "contents": [
54               {
55                 "type": "text",
56                 "text": "foo",
57                 "language": "aa"
58               }
59             ],
60             "type": "request-start",
61             "blocking": false,
62             "content": "foo",
63             "conditions": [
64               {
65                 "operator": "eq",
66                 "param": "foo",
67                 "value": "foo"
68               }
69             ]
70           }
71         ],
72         "type": "apiRequest",
73         "method": "POST",
74         "timeoutSeconds": 20,
75         "name": "foo",
76         "description": "foo",
77         "url": "foo",
78         "body": {
79           "type": "string",
80           "items": {},
81           "properties": {},
82           "description": "foo",
83           "pattern": "foo",
84           "format": "date-time",
85           "required": [
86             "foo"
87           ],
88           "enum": [
89             "foo"
90           ],
91           "title": "foo"
92         },
93         "headers": {
94           "type": "string",
95           "items": {},
96           "properties": {},
97           "description": "foo",
98           "pattern": "foo",
99           "format": "date-time",
100           "required": [
101             "foo"
102           ],
103           "enum": [
104             "foo"
105           ],
106           "title": "foo"
107         },
108         "backoffPlan": {
109           "type": "fixed",
110           "maxRetries": 0,
111           "baseDelaySeconds": 1
112         },
113         "variableExtractionPlan": {
114           "schema": {
115             "type": "string",
116             "items": {},
117             "properties": {},
118             "description": "foo",
119             "pattern": "foo",
120             "format": "date-time",
121             "required": [
122               "foo"
123             ],
124             "enum": [
125               "foo"
126             ],
127             "title": "foo"
128           },
129           "aliases": [
130             {
131               "key": "foo",
132               "value": "foo"
133             }
134           ]
135         },
136         "rejectionPlan": {
137           "conditions": [
138             {
139               "type": "regex",
140               "regex": "\\\\b(cancel|stop|wait)\\\\b - Matches whole words",
141               "target": {
142                 "role": "user",
143                 "position": -1
144               },
145               "negate": "true - Reject if user hasn\"t said goodbye: { regex: \"\\\\b(bye|goodbye)\\\\b\", negate: true }"
146             }
147           ]
148         }
149       }
150     ],
151     "toolIds": [
152       "foo"
153     ],
154     "knowledgeBase": {
155       "provider": "custom-knowledge-base",
156       "server": {
157         "timeoutSeconds": 20,
158         "url": "foo",
159         "headers": {},
160         "backoffPlan": {
161           "type": "fixed",
162           "maxRetries": 0,
163           "baseDelaySeconds": 1
164         }
165       }
166     },
167     "knowledgeBaseId": "foo",
168     "model": "claude-3-opus-20240229",
169     "provider": "anthropic",
170     "thinking": {
171       "type": "enabled",
172       "budgetTokens": 42
173     },
174     "temperature": 42,
175     "maxTokens": 42,
176     "emotionRecognitionEnabled": true,
177     "numFastTurns": 42
178   },
179   "voice": {
180     "cachingEnabled": true,
181     "provider": "azure",
182     "voiceId": "andrew",
183     "chunkPlan": {
184       "enabled": true,
185       "minCharacters": 30,
186       "punctuationBoundaries": "。",
187       "formatPlan": {
188         "enabled": true,
189         "numberToDigitsCutoff": 2025,
190         "replacements": [
191           {
192             "type": "exact",
193             "replaceAllEnabled": false,
194             "key": "foo",
195             "value": "foo"
196           }
197         ],
198         "formattersEnabled": "markdown"
199       }
200     },
201     "speed": 42,
202     "fallbackPlan": {
203       "voices": [
204         {
205           "cachingEnabled": true,
206           "provider": "azure",
207           "voiceId": "andrew",
208           "speed": 42,
209           "chunkPlan": {
210             "enabled": true,
211             "minCharacters": 30,
212             "punctuationBoundaries": "。",
213             "formatPlan": {
214               "enabled": true,
215               "numberToDigitsCutoff": 2025,
216               "replacements": [
217                 {
218                   "type": {},
219                   "replaceAllEnabled": {},
220                   "key": {},
221                   "value": {}
222                 }
223               ],
224               "formattersEnabled": "markdown"
225             }
226           },
227           "oneOf": null
228         }
229       ]
230     }
231   },
232   "firstMessage": "Hello! How can I help you today?",
233   "firstMessageInterruptionsEnabled": false,
234   "firstMessageMode": "assistant-speaks-first",
235   "voicemailDetection": {
236     "beepMaxAwaitSeconds": 30,
237     "provider": "google",
238     "backoffPlan": {
239       "startAtSeconds": 5,
240       "frequencySeconds": 5,
241       "maxRetries": 6
242     }
243   },
244   "clientMessages": "conversation-update",
245   "serverMessages": "conversation-update",
246   "maxDurationSeconds": 600,
247   "backgroundSound": "off",
248   "modelOutputInMessagesEnabled": false,
249   "transportConfigurations": [
250     {
251       "provider": "twilio",
252       "timeout": 60,
253       "record": false,
254       "recordingChannels": "mono"
255     }
256   ],
257   "observabilityPlan": {
258     "provider": "langfuse",
259     "tags": [
260       "foo"
261     ],
262     "metadata": {}
263   },
264   "credentials": [
265     {
266       "provider": "anthropic",
267       "apiKey": "foo",
268       "name": "foo"
269     }
270   ],
271   "hooks": [
272     {
273       "on": "call.ending",
274       "do": [
275         {
276           "type": "tool",
277           "tool": {
278             "messages": [
279               {
280                 "contents": [
281                   {
282                     "type": "text",
283                     "text": "foo",
284                     "language": "aa"
285                   }
286                 ],
287                 "type": "request-start",
288                 "blocking": false,
289                 "content": "foo",
290                 "conditions": [
291                   {
292                     "operator": "eq",
293                     "param": "foo",
294                     "value": "foo"
295                   }
296                 ]
297               }
298             ],
299             "type": "apiRequest",
300             "method": "POST",
301             "timeoutSeconds": 20,
302             "name": "foo",
303             "description": "foo",
304             "url": "foo",
305             "body": {
306               "type": "string",
307               "items": {},
308               "properties": {},
309               "description": "foo",
310               "pattern": "foo",
311               "format": "date-time",
312               "required": [
313                 "foo"
314               ],
315               "enum": [
316                 "foo"
317               ],
318               "title": "foo"
319             },
320             "headers": {
321               "type": "string",
322               "items": {},
323               "properties": {},
324               "description": "foo",
325               "pattern": "foo",
326               "format": "date-time",
327               "required": [
328                 "foo"
329               ],
330               "enum": [
331                 "foo"
332               ],
333               "title": "foo"
334             },
335             "backoffPlan": {
336               "type": "fixed",
337               "maxRetries": 0,
338               "baseDelaySeconds": 1
339             },
340             "variableExtractionPlan": {
341               "schema": {
342                 "type": "string",
343                 "items": {},
344                 "properties": {},
345                 "description": "foo",
346                 "pattern": "foo",
347                 "format": "date-time",
348                 "required": [
349                   "foo"
350                 ],
351                 "enum": [
352                   "foo"
353                 ],
354                 "title": "foo"
355               },
356               "aliases": [
357                 {
358                   "key": "foo",
359                   "value": "foo"
360                 }
361               ]
362             },
363             "rejectionPlan": {
364               "conditions": [
365                 {
366                   "type": "regex",
367                   "regex": "\\\\b(cancel|stop|wait)\\\\b - Matches whole words",
368                   "target": {
369                     "role": {},
370                     "position": {}
371                   },
372                   "negate": "true - Reject if user hasn\"t said goodbye: { regex: \"\\\\b(bye|goodbye)\\\\b\", negate: true }"
373                 }
374               ]
375             }
376           },
377           "toolId": "foo"
378         }
379       ],
380       "filters": [
381         {
382           "type": "oneOf",
383           "key": "foo",
384           "oneOf": [
385             "foo"
386           ]
387         }
388       ]
389     }
390   ],
391   "name": "foo",
392   "voicemailMessage": "foo",
393   "endCallMessage": "foo",
394   "endCallPhrases": [
395     "foo"
396   ],
397   "compliancePlan": {
398     "hipaaEnabled": {
399       "hipaaEnabled": false
400     },
401     "pciEnabled": {
402       "pciEnabled": false
403     },
404     "securityFilterPlan": {
405       "enabled": false,
406       "filters": [
407         {}
408       ],
409       "mode": "sanitize",
410       "replacementText": "[FILTERED]"
411     }
412   },
413   "metadata": {},
414   "backgroundSpeechDenoisingPlan": {
415     "smartDenoisingPlan": {
416       "enabled": false
417     },
418     "fourierDenoisingPlan": {
419       "enabled": false,
420       "mediaDetectionEnabled": true,
421       "staticThreshold": -35,
422       "baselineOffsetDb": -15,
423       "windowSizeMs": 3000,
424       "baselinePercentile": 85
425     }
426   },
427   "analysisPlan": {
428     "minMessagesThreshold": 42,
429     "summaryPlan": {
430       "messages": [
431         {}
432       ],
433       "enabled": true,
434       "timeoutSeconds": 42
435     },
436     "structuredDataPlan": {
437       "messages": [
438         {}
439       ],
440       "enabled": true,
441       "schema": {
442         "type": "string",
443         "items": {},
444         "properties": {},
445         "description": "foo",
446         "pattern": "foo",
447         "format": "date-time",
448         "required": [
449           "foo"
450         ],
451         "enum": [
452           "foo"
453         ],
454         "title": "foo"
455       },
456       "timeoutSeconds": 42
457     },
458     "structuredDataMultiPlan": [
459       {
460         "key": "foo",
461         "plan": {
462           "messages": [
463             {}
464           ],
465           "enabled": true,
466           "schema": {
467             "type": "string",
468             "items": {},
469             "properties": {},
470             "description": "foo",
471             "pattern": "foo",
472             "format": "date-time",
473             "required": [
474               "foo"
475             ],
476             "enum": [
477               "foo"
478             ],
479             "title": "foo"
480           },
481           "timeoutSeconds": 42
482         }
483       }
484     ],
485     "successEvaluationPlan": {
486       "rubric": "NumericScale",
487       "messages": [
488         {}
489       ],
490       "enabled": true,
491       "timeoutSeconds": 42
492     },
493     "outcomeIds": [
494       "foo"
495     ]
496   },
497   "artifactPlan": {
498     "recordingEnabled": true,
499     "recordingFormat": "wav;l16",
500     "videoRecordingEnabled": false,
501     "pcapEnabled": true,
502     "pcapS3PathPrefix": "/pcaps",
503     "transcriptPlan": {
504       "enabled": true,
505       "assistantName": "foo",
506       "userName": "foo"
507     },
508     "recordingPath": "foo",
509     "structuredOutputIds": [
510       "foo"
511     ]
512   },
513   "startSpeakingPlan": {
514     "waitSeconds": 0.4,
515     "smartEndpointingPlan": {
516       "provider": "vapi"
517     },
518     "customEndpointingRules": [
519       {
520         "type": "assistant",
521         "regex": "foo",
522         "regexOptions": [
523           {
524             "type": "ignore-case",
525             "enabled": true
526           }
527         ],
528         "timeoutSeconds": 42
529       }
530     ],
531     "transcriptionEndpointingPlan": {
532       "onPunctuationSeconds": 0.1,
533       "onNoPunctuationSeconds": 1.5,
534       "onNumberSeconds": 0.5
535     },
536     "smartEndpointingEnabled": false
537   },
538   "stopSpeakingPlan": {
539     "numWords": 0,
540     "voiceSeconds": 0.2,
541     "backoffSeconds": 1,
542     "acknowledgementPhrases": [
543       "i understand",
544       "i see",
545       "i got it",
546       "i hear you",
547       "im listening",
548       "im with you",
549       "right",
550       "okay",
551       "ok",
552       "sure",
553       "alright",
554       "got it",
555       "understood",
556       "yeah",
557       "yes",
558       "uh-huh",
559       "mm-hmm",
560       "gotcha",
561       "mhmm",
562       "ah",
563       "yeah okay",
564       "yeah sure"
565     ],
566     "interruptionPhrases": [
567       "stop",
568       "shut",
569       "up",
570       "enough",
571       "quiet",
572       "silence",
573       "but",
574       "dont",
575       "not",
576       "no",
577       "hold",
578       "wait",
579       "cut",
580       "pause",
581       "nope",
582       "nah",
583       "nevermind",
584       "never",
585       "bad",
586       "actually"
587     ]
588   },
589   "monitorPlan": {
590     "listenEnabled": false,
591     "listenAuthenticationEnabled": false,
592     "controlEnabled": false,
593     "controlAuthenticationEnabled": false
594   },
595   "credentialIds": [
596     "foo"
597   ],
598   "server": {
599     "timeoutSeconds": 20,
600     "url": "foo",
601     "headers": {},
602     "backoffPlan": {
603       "type": "fixed",
604       "maxRetries": 0,
605       "baseDelaySeconds": 1
606     }
607   },
608   "keypadInputPlan": {
609     "enabled": true,
610     "timeoutSeconds": 42,
611     "delimiters": "#"
612   }
613 }

Request

This endpoint expects an object.

transcriberobjectOptional

These are the options for the assistant's transcriber.

modelobjectOptional

These are the options for the assistant's LLM.

voiceobjectOptional

These are the options for the assistant's voice.

firstMessagestringOptional

This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).

If unspecified, assistant will wait for user to speak and use the model to respond once they speak.

firstMessageInterruptionsEnabledbooleanOptionalDefaults to false

firstMessageModeenumOptional

This is the mode for the first message. Default is ‘assistant-speaks-first’.

Use:

‘assistant-speaks-first’ to have the assistant speak first.
‘assistant-waits-for-user’ to have the assistant wait for the user to speak first.
‘assistant-speaks-first-with-model-generated-message’ to have the assistant speak first with a message generated by the model based on the conversation state. (assistant.model.messages at call start, call.messages at squad transfer points).

@default ‘assistant-speaks-first’

Allowed values:

voicemailDetectionobjectOptional

These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool]. This uses Twilio’s built-in detection while the VoicemailTool relies on the model to detect if a voicemail was reached. You can use neither of them, one of them, or both of them. By default, Twilio built-in detection is enabled while VoicemailTool is not.

clientMessagesenumOptional

These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transfer-update,transcript,tool-calls,user-interrupted,voice-input,workflow.node.started. You can check the shape of the messages in ClientMessage schema.

serverMessagesenumOptional

These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,user-interrupted. You can check the shape of the messages in ServerMessage schema.

maxDurationSecondsdoubleOptional>=10<=43200

This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.

@default 600 (10 minutes)

backgroundSoundenum or stringOptional

This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'. You can also provide a custom sound by providing a URL to an audio file.

modelOutputInMessagesEnabledbooleanOptional

This determines whether the model’s output is used in conversation history rather than the transcription of assistant’s speech.

Default false while in beta.

@default false

transportConfigurationslist of objectsOptional

These are the configurations to be passed to the transport providers of assistant's calls, like Twilio. You can store multiple configurations for different transport providers. For a call, only the configuration matching the call transport provider is used.

observabilityPlanobjectOptional

This is the plan for observability of assistant's calls. Currently, only Langfuse is supported.

credentialslist of objectsOptional

These are dynamic credentials that will be used for the assistant calls. By default, all the credentials are available for use in the call but you can supplement an additional credentials using this. Dynamic credentials override existing credentials.

hookslist of objectsOptional

This is a set of actions that will be performed on certain events.

namestringOptional<=40 characters

This is the name of the assistant. This is required when you want to transfer between assistants in a call.

voicemailMessagestringOptional<=1000 characters

This is the message that the assistant will say if the call is forwarded to voicemail. If unspecified, it will hang up.

endCallMessagestringOptional<=1000 characters

This is the message that the assistant will say if it ends the call. If unspecified, it will hang up without saying anything.

endCallPhraseslist of stringsOptional

This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.

compliancePlanobjectOptional

metadataobjectOptional

This is for metadata you want to store on the assistant.

backgroundSpeechDenoisingPlanobjectOptional

This enables filtering of noise and background speech while the user is talking.

Features:

Smart denoising using Krisp
Fourier denoising

Smart denoising can be combined with or used independently of Fourier denoising.

Order of precedence:

Smart denoising
Fourier denoising

analysisPlanobjectOptional

This is the plan for analysis of assistant’s calls. Stored in call.analysis.

artifactPlanobjectOptional

This is the plan for artifacts generated during assistant’s calls. Stored in call.artifact.

startSpeakingPlanobjectOptional

This is the plan for when the assistant should start talking.

You should configure this if you’re running into these issues:

The assistant is too slow to start talking after the customer is done speaking.
The assistant is too fast to start talking after the customer is done speaking.
The assistant is so fast that it’s actually interrupting the customer.

stopSpeakingPlanobjectOptional

This is the plan for when assistant should stop talking on customer interruption.

You should configure this if you’re running into these issues:

The assistant is too slow to recognize customer’s interruption.
The assistant is too fast to recognize customer’s interruption.
The assistant is getting interrupted by phrases that are just acknowledgments.
The assistant is getting interrupted by background noises.
The assistant is not properly stopping — it starts talking right after getting interrupted.

monitorPlanobjectOptional

This is the plan for real-time monitoring of the assistant’s calls.

Usage:

To enable live listening of the assistant’s calls, set monitorPlan.listenEnabled to true.
To enable live control of the assistant’s calls, set monitorPlan.controlEnabled to true.

credentialIdslist of stringsOptional

These are the credentials that will be used for the assistant calls. By default, all the credentials are available for use in the call but you can provide a subset using this.

serverobjectOptional

This is where Vapi will send webhooks. You can find all webhooks available along with their shape in ServerMessage schema.

The order of precedence is:

assistant.server.url
phoneNumber.serverUrl
org.serverUrl

keypadInputPlanobjectOptional

Response

idstring

This is the unique identifier for the assistant.

orgIdstring

This is the unique identifier for the org that this assistant belongs to.

createdAtstringformat: "date-time"

This is the ISO 8601 date-time string of when the assistant was created.

updatedAtstringformat: "date-time"

This is the ISO 8601 date-time string of when the assistant was last updated.

transcriberobject or null

These are the options for the assistant's transcriber.

modelobject or null

These are the options for the assistant's LLM.

voiceobject or null

These are the options for the assistant's voice.

firstMessagestring or null

This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).

If unspecified, assistant will wait for user to speak and use the model to respond once they speak.

firstMessageInterruptionsEnabledboolean or nullDefaults to false

firstMessageModeenum or null

This is the mode for the first message. Default is ‘assistant-speaks-first’.

Use:

‘assistant-speaks-first’ to have the assistant speak first.
‘assistant-waits-for-user’ to have the assistant wait for the user to speak first.
‘assistant-speaks-first-with-model-generated-message’ to have the assistant speak first with a message generated by the model based on the conversation state. (assistant.model.messages at call start, call.messages at squad transfer points).

@default ‘assistant-speaks-first’

Allowed values:

voicemailDetectionobject or null

clientMessagesenum or null

serverMessagesenum or null

maxDurationSecondsdouble or null>=10<=43200

This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.

@default 600 (10 minutes)

backgroundSoundenum or string or null

This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'. You can also provide a custom sound by providing a URL to an audio file.

modelOutputInMessagesEnabledboolean or null

This determines whether the model’s output is used in conversation history rather than the transcription of assistant’s speech.

Default false while in beta.

@default false

transportConfigurationslist of objects or null

observabilityPlanobject or null

This is the plan for observability of assistant's calls. Currently, only Langfuse is supported.

credentialslist of objects or null

hookslist of objects or null

This is a set of actions that will be performed on certain events.

namestring or null<=40 characters

This is the name of the assistant. This is required when you want to transfer between assistants in a call.

voicemailMessagestring or null<=1000 characters

This is the message that the assistant will say if the call is forwarded to voicemail. If unspecified, it will hang up.

endCallMessagestring or null<=1000 characters

This is the message that the assistant will say if it ends the call. If unspecified, it will hang up without saying anything.

endCallPhraseslist of strings or null

This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.

compliancePlanobject or null

metadataobject or null

This is for metadata you want to store on the assistant.

backgroundSpeechDenoisingPlanobject or null

This enables filtering of noise and background speech while the user is talking.

Features:

Smart denoising using Krisp
Fourier denoising

Smart denoising can be combined with or used independently of Fourier denoising.

Order of precedence:

Smart denoising
Fourier denoising

analysisPlanobject or null

This is the plan for analysis of assistant’s calls. Stored in call.analysis.

artifactPlanobject or null

This is the plan for artifacts generated during assistant’s calls. Stored in call.artifact.

startSpeakingPlanobject or null

This is the plan for when the assistant should start talking.

You should configure this if you’re running into these issues:

The assistant is too slow to start talking after the customer is done speaking.
The assistant is too fast to start talking after the customer is done speaking.
The assistant is so fast that it’s actually interrupting the customer.

stopSpeakingPlanobject or null

This is the plan for when assistant should stop talking on customer interruption.

You should configure this if you’re running into these issues:

The assistant is too slow to recognize customer’s interruption.
The assistant is too fast to recognize customer’s interruption.
The assistant is getting interrupted by phrases that are just acknowledgments.
The assistant is getting interrupted by background noises.
The assistant is not properly stopping — it starts talking right after getting interrupted.

monitorPlanobject or null

This is the plan for real-time monitoring of the assistant’s calls.

Usage:

To enable live listening of the assistant’s calls, set monitorPlan.listenEnabled to true.
To enable live control of the assistant’s calls, set monitorPlan.controlEnabled to true.

credentialIdslist of strings or null

These are the credentials that will be used for the assistant calls. By default, all the credentials are available for use in the call but you can provide a subset using this.

serverobject or null

This is where Vapi will send webhooks. You can find all webhooks available along with their shape in ServerMessage schema.

The order of precedence is:

assistant.server.url
phoneNumber.serverUrl
org.serverUrl

keypadInputPlanobject or null

Headers

Request

Response