- Quickstart
- API Reference
- Dashboard
- Community
- GitHub
- POSTCreate Assistant
- GETGet Assistant
- GETList Assistants
- PATCHUpdate Assistant
- DELDelete Assistant
- POSTCreate Phone Number
- GETGet Phone Number
- GETList Phone Numbers
- PATCHUpdate Phone Number
- DELDelete Phone Number
- POSTCreate Analytics Queries
Assistants
Phone Numbers
Analytics
List Squads
Retrieve your API Key from Dashboard.
This is the maximum number of items to return. Defaults to 100.
This will return items where the createdAt is greater than the specified value.
This will return items where the createdAt is less than the specified value.
This will return items where the createdAt is greater than or equal to the specified value.
This will return items where the createdAt is less than or equal to the specified value.
This will return items where the updatedAt is greater than the specified value.
This will return items where the updatedAt is less than the specified value.
This will return items where the updatedAt is greater than or equal to the specified value.
This will return items where the updatedAt is less than or equal to the specified value.
curl --request GET \
--url https://api.vapi.ai/squad \
--header 'Authorization: Bearer <token>'
[
{
"name": "<string>",
"members": [
{
"assistantId": "<string>",
"assistant": {
"transcriber": {
"provider": "deepgram",
"model": "nova-2",
"language": "bg",
"smartFormat": true,
"keywords": [
"<string>"
]
},
"model": {
"messages": [
{
"content": "<string>",
"role": "assistant"
}
],
"tools": [
{
"async": true,
"messages": [
{
"type": "request-start",
"content": "<string>",
"conditions": [
{
"param": "<string>",
"value": "<string>",
"operator": "eq"
}
]
}
],
"type": "dtmf",
"function": {
"name": "<string>",
"description": "<string>",
"parameters": {
"type": "object",
"properties": {},
"required": [
"<string>"
]
}
},
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"secret": "<string>"
}
}
],
"toolIds": [
"<string>"
],
"provider": "anyscale",
"model": "<string>",
"temperature": 1,
"knowledgeBase": {
"provider": "canonical",
"topK": 5.5,
"fileIds": [
"<string>"
]
},
"maxTokens": 525,
"emotionRecognitionEnabled": true
},
"voice": {
"inputPreprocessingEnabled": true,
"inputReformattingEnabled": true,
"inputMinCharacters": 30,
"inputPunctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
")",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"fillerInjectionEnabled": true,
"provider": "azure",
"voiceId": "andrew",
"speed": 1.25
},
"firstMessageMode": "assistant-speaks-first",
"recordingEnabled": true,
"hipaaEnabled": true,
"clientMessages": [
"conversation-update",
"function-call",
"hang",
"model-output",
"speech-update",
"status-update",
"transcript",
"tool-calls",
"user-interrupted",
"voice-input"
],
"serverMessages": [
"conversation-update",
"end-of-call-report",
"function-call",
"hang",
"speech-update",
"status-update",
"tool-calls",
"transfer-destination-request",
"user-interrupted"
],
"silenceTimeoutSeconds": 30,
"responseDelaySeconds": 0.4,
"llmRequestDelaySeconds": 0.1,
"llmRequestNonPunctuatedDelaySeconds": 1.5,
"numWordsToInterruptAssistant": 5,
"maxDurationSeconds": 1800,
"backgroundSound": "office",
"backchannelingEnabled": true,
"backgroundDenoisingEnabled": true,
"modelOutputInMessagesEnabled": true,
"name": "<string>",
"firstMessage": "<string>",
"voicemailDetection": {
"provider": "twilio",
"voicemailDetectionTypes": [
"machine_end_beep",
"machine_end_silence"
],
"enabled": true,
"machineDetectionTimeout": 31,
"machineDetectionSpeechThreshold": 3500,
"machineDetectionSpeechEndThreshold": 2750,
"machineDetectionSilenceTimeout": 6000
},
"voicemailMessage": "<string>",
"endCallMessage": "<string>",
"endCallPhrases": [
"<string>"
],
"metadata": {},
"serverUrl": "<string>",
"serverUrlSecret": "<string>",
"analysisPlan": {
"summaryPrompt": "<string>",
"summaryRequestTimeoutSeconds": 10.5,
"structuredDataRequestTimeoutSeconds": 10.5,
"successEvaluationPrompt": "<string>",
"successEvaluationRubric": "NumericScale",
"successEvaluationRequestTimeoutSeconds": 10.5,
"structuredDataPrompt": "<string>",
"structuredDataSchema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"required": [
"<string>"
]
}
},
"artifactPlan": {
"videoRecordingEnabled": true
},
"messagePlan": {
"idleMessages": [
"<string>"
],
"idleMessageMaxSpokenCount": 5.5,
"idleTimeoutSeconds": 7.5
}
},
"assistantOverrides": {
"transcriber": {
"provider": "deepgram",
"model": "nova-2",
"language": "bg",
"smartFormat": true,
"keywords": [
"<string>"
]
},
"model": {
"messages": [
{
"content": "<string>",
"role": "assistant"
}
],
"tools": [
{
"async": true,
"messages": [
{
"type": "request-start",
"content": "<string>",
"conditions": [
{
"param": "<string>",
"value": "<string>",
"operator": "eq"
}
]
}
],
"type": "dtmf",
"function": {
"name": "<string>",
"description": "<string>",
"parameters": {
"type": "object",
"properties": {},
"required": [
"<string>"
]
}
},
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"secret": "<string>"
}
}
],
"toolIds": [
"<string>"
],
"provider": "anyscale",
"model": "<string>",
"temperature": 1,
"knowledgeBase": {
"provider": "canonical",
"topK": 5.5,
"fileIds": [
"<string>"
]
},
"maxTokens": 525,
"emotionRecognitionEnabled": true
},
"voice": {
"inputPreprocessingEnabled": true,
"inputReformattingEnabled": true,
"inputMinCharacters": 30,
"inputPunctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
")",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"fillerInjectionEnabled": true,
"provider": "azure",
"voiceId": "andrew",
"speed": 1.25
},
"firstMessageMode": "assistant-speaks-first",
"recordingEnabled": true,
"hipaaEnabled": true,
"clientMessages": [
"conversation-update",
"function-call",
"hang",
"model-output",
"speech-update",
"status-update",
"transcript",
"tool-calls",
"user-interrupted",
"voice-input"
],
"serverMessages": [
"conversation-update",
"end-of-call-report",
"function-call",
"hang",
"speech-update",
"status-update",
"tool-calls",
"transfer-destination-request",
"user-interrupted"
],
"silenceTimeoutSeconds": 30,
"responseDelaySeconds": 0.4,
"llmRequestDelaySeconds": 0.1,
"llmRequestNonPunctuatedDelaySeconds": 1.5,
"numWordsToInterruptAssistant": 5,
"maxDurationSeconds": 1800,
"backgroundSound": "office",
"backchannelingEnabled": true,
"backgroundDenoisingEnabled": true,
"modelOutputInMessagesEnabled": true,
"variableValues": {},
"name": "<string>",
"firstMessage": "<string>",
"voicemailDetection": {
"provider": "twilio",
"voicemailDetectionTypes": [
"machine_end_beep",
"machine_end_silence"
],
"enabled": true,
"machineDetectionTimeout": 31,
"machineDetectionSpeechThreshold": 3500,
"machineDetectionSpeechEndThreshold": 2750,
"machineDetectionSilenceTimeout": 6000
},
"voicemailMessage": "<string>",
"endCallMessage": "<string>",
"endCallPhrases": [
"<string>"
],
"metadata": {},
"serverUrl": "<string>",
"serverUrlSecret": "<string>",
"analysisPlan": {
"summaryPrompt": "<string>",
"summaryRequestTimeoutSeconds": 10.5,
"structuredDataRequestTimeoutSeconds": 10.5,
"successEvaluationPrompt": "<string>",
"successEvaluationRubric": "NumericScale",
"successEvaluationRequestTimeoutSeconds": 10.5,
"structuredDataPrompt": "<string>",
"structuredDataSchema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"required": [
"<string>"
]
}
},
"artifactPlan": {
"videoRecordingEnabled": true
},
"messagePlan": {
"idleMessages": [
"<string>"
],
"idleMessageMaxSpokenCount": 5.5,
"idleTimeoutSeconds": 7.5
}
},
"assistantDestinations": [
{
"type": "assistant",
"assistantName": "<string>",
"message": "<string>",
"description": "<string>"
}
]
}
],
"membersOverrides": {
"transcriber": {
"provider": "deepgram",
"model": "nova-2",
"language": "bg",
"smartFormat": true,
"keywords": [
"<string>"
]
},
"model": {
"messages": [
{
"content": "<string>",
"role": "assistant"
}
],
"tools": [
{
"async": true,
"messages": [
{
"type": "request-start",
"content": "<string>",
"conditions": [
{
"param": "<string>",
"value": "<string>",
"operator": "eq"
}
]
}
],
"type": "dtmf",
"function": {
"name": "<string>",
"description": "<string>",
"parameters": {
"type": "object",
"properties": {},
"required": [
"<string>"
]
}
},
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"secret": "<string>"
}
}
],
"toolIds": [
"<string>"
],
"provider": "anyscale",
"model": "<string>",
"temperature": 1,
"knowledgeBase": {
"provider": "canonical",
"topK": 5.5,
"fileIds": [
"<string>"
]
},
"maxTokens": 525,
"emotionRecognitionEnabled": true
},
"voice": {
"inputPreprocessingEnabled": true,
"inputReformattingEnabled": true,
"inputMinCharacters": 30,
"inputPunctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
")",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"fillerInjectionEnabled": true,
"provider": "azure",
"voiceId": "andrew",
"speed": 1.25
},
"firstMessageMode": "assistant-speaks-first",
"recordingEnabled": true,
"hipaaEnabled": true,
"clientMessages": [
"conversation-update",
"function-call",
"hang",
"model-output",
"speech-update",
"status-update",
"transcript",
"tool-calls",
"user-interrupted",
"voice-input"
],
"serverMessages": [
"conversation-update",
"end-of-call-report",
"function-call",
"hang",
"speech-update",
"status-update",
"tool-calls",
"transfer-destination-request",
"user-interrupted"
],
"silenceTimeoutSeconds": 30,
"responseDelaySeconds": 0.4,
"llmRequestDelaySeconds": 0.1,
"llmRequestNonPunctuatedDelaySeconds": 1.5,
"numWordsToInterruptAssistant": 5,
"maxDurationSeconds": 1800,
"backgroundSound": "office",
"backchannelingEnabled": true,
"backgroundDenoisingEnabled": true,
"modelOutputInMessagesEnabled": true,
"variableValues": {},
"name": "<string>",
"firstMessage": "<string>",
"voicemailDetection": {
"provider": "twilio",
"voicemailDetectionTypes": [
"machine_end_beep",
"machine_end_silence"
],
"enabled": true,
"machineDetectionTimeout": 31,
"machineDetectionSpeechThreshold": 3500,
"machineDetectionSpeechEndThreshold": 2750,
"machineDetectionSilenceTimeout": 6000
},
"voicemailMessage": "<string>",
"endCallMessage": "<string>",
"endCallPhrases": [
"<string>"
],
"metadata": {},
"serverUrl": "<string>",
"serverUrlSecret": "<string>",
"analysisPlan": {
"summaryPrompt": "<string>",
"summaryRequestTimeoutSeconds": 10.5,
"structuredDataRequestTimeoutSeconds": 10.5,
"successEvaluationPrompt": "<string>",
"successEvaluationRubric": "NumericScale",
"successEvaluationRequestTimeoutSeconds": 10.5,
"structuredDataPrompt": "<string>",
"structuredDataSchema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"required": [
"<string>"
]
}
},
"artifactPlan": {
"videoRecordingEnabled": true
},
"messagePlan": {
"idleMessages": [
"<string>"
],
"idleMessageMaxSpokenCount": 5.5,
"idleTimeoutSeconds": 7.5
}
},
"id": "<string>",
"orgId": "<string>",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
]
Authorizations
Retrieve your API Key from Dashboard.
Query Parameters
This is the maximum number of items to return. Defaults to 100.
This will return items where the createdAt is greater than the specified value.
This will return items where the createdAt is less than the specified value.
This will return items where the createdAt is greater than or equal to the specified value.
This will return items where the createdAt is less than or equal to the specified value.
This will return items where the updatedAt is greater than the specified value.
This will return items where the updatedAt is less than the specified value.
This will return items where the updatedAt is greater than or equal to the specified value.
This will return items where the updatedAt is less than or equal to the specified value.
Response
This is the name of the squad.
This is the list of assistants that make up the squad.
The call will start with the first assistant in the list.
This is the assistant that will be used for the call. To use a transient assistant, use assistant
instead.
This is the assistant that will be used for the call. To use an existing assistant, use assistantId
instead.
These are the options for the assistant's transcriber.
This is the transcription provider that will be used.
deepgram
This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview
nova-2
, nova-2-general
, nova-2-meeting
, nova-2-phonecall
, nova-2-finance
, nova-2-conversationalai
, nova-2-voicemail
, nova-2-video
, nova-2-medical
, nova-2-drivethru
, nova-2-automotive
, nova
, nova-general
, nova-phonecall
, nova-medical
, enhanced
, enhanced-general
, enhanced-meeting
, enhanced-phonecall
, enhanced-finance
, base
, base-general
, base-meeting
, base-phonecall
, base-finance
, base-conversationalai
, base-voicemail
, base-video
This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview
bg
, ca
, cs
, da
, da-DK
, de
, de-CH
, el
, en
, en-AU
, en-GB
, en-IN
, en-NZ
, en-US
, es
, es-419
, es-LATAM
, et
, fi
, fr
, fr-CA
, hi
, hi-Latn
, hu
, id
, it
, ja
, ko
, ko-KR
, lt
, lv
, ms
, nl
, nl-BE
, no
, pl
, pt
, pt-BR
, ro
, ru
, sk
, sv
, sv-SE
, ta
, taq
, th
, th-TH
, tr
, uk
, vi
, zh
, zh-CN
, zh-Hans
, zh-Hant
, zh-TW
This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times sometimes but it's getting better.
These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.
These are the options for the assistant's LLM.
This is the starting state for the conversation.
assistant
, function
, user
, system
, tool
These are the tools that the assistant can use during the call. To use existing tools, use toolIds
.
Both tools
and toolIds
can be used together.
This determines if the tool is async.
If async, the assistant will move forward without waiting for your server to respond. This is useful if you just want to trigger something on your server.
If sync, the assistant will wait for your server to respond. This is useful if want assistant to respond with the result from your server.
Defaults to synchronous (false
).
These are the messages that will be spoken to the user as the tool is running.
For some tools, this is auto-filled based on special fields like tool.destinations
. For others like the function tool, these can be custom configured.
This message is triggered when the tool call starts.
This message is never triggered for async tools.
If this message is not provided, one of the default filler messages "Hold on a sec", "One moment", "Just a sec", "Give me a moment" or "This'll just take a sec" will be used.
request-start
This is the content that the assistant says when this message is triggered.
This is an optional array of conditions that the tool call arguments must meet in order for this message to be triggered.
This is the name of the parameter that you want to check.
This is the value you want to compare against the parameter.
This is the operator you want to use to compare the parameter and value.
eq
, neq
, gt
, gte
, lt
, lte
The type of tool. "dtmf" for DTMF tool.
dtmf
This is the function definition of the tool.
For endCall
, transferCall
, and dtmf
tools, this is auto-filled based on tool-specific fields like tool.destinations
. But, even in those cases, you can provide a custom function definition for advanced use cases.
An example of an advanced use case is if you want to customize the message that's spoken for endCall
tool. You can specify a function where it returns an argument "reason". Then, in messages
array, you can have many "request-complete" messages. One of these messages will be triggered if the messages[].conditions
matches the "reason" argument.
This is the the name of the function to be called.
Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
This is the description of what the function does, used by the AI to choose when and how to call the function.
These are the parameters the functions accepts, described as a JSON Schema object.
See the OpenAI guide for examples, and the JSON Schema reference for documentation about the format.
Omitting parameters defines a function with an empty parameter list.
This must be set to 'object'. It instructs the model to return a JSON object containing the function call properties.
object
This provides a description of the properties required by the function. JSON Schema can be used to specify expectations for each property. Refer to this doc for a comprehensive guide on JSON Schema.
This specifies the properties that are required by the function.
This is the server that will be hit when this tool is requested by the model.
All requests will be sent with the call object among other things. You can find more details in the Server URL documentation.
This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: highest tool.server.url, then assistant.serverUrl, then phoneNumber.serverUrl, then org.serverUrl.
This is the timeout in seconds for the request to your server. Defaults to 20 seconds.
@default 20
API endpoint to send requests to.
This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.
Same precedence logic as server.
These are the tools that the assistant can use during the call. To use transient tools, use tools
.
Both tools
and toolIds
can be used together.
anyscale
This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b
This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency.
These are the options for the knowledge base.
canonical
This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250.
This determines whether we detect user's emotion while they speak and send it as an additional info to model.
Default false
because the model is usually are good at understanding the user's emotion from text.
These are the options for the assistant's voice.
This determines whether the model output is preprocessed into chunks before being sent to the voice provider.
Default true
because voice generation sounds better with chunking (and reformatting them).
To send every token from the model output directly to the voice provider and rely on the voice provider's audio generation logic, set this to false
.
If disabled, vapi-provided audio control tokens like <flush /> will not work.
This determines whether the chunk is reformatted before being sent to the voice provider. Many things are reformatted including phone numbers, emails and addresses to improve their enunciation.
Default true
because voice generation sounds better with reformatting.
To disable chunk reformatting, set this to false
.
To disable chunking completely, set inputPreprocessingEnabled
to false
.
This is the minimum number of characters before a chunk is created. The chunks that are sent to the voice provider for the voice generation as the model tokens are streaming in. Defaults to 30.
Increasing this value might add latency as it waits for the model to output a full chunk before sending it to the voice provider. On the other hand, increasing might be a good idea if you want to give voice provider bigger chunks so it can pronounce them better.
Decreasing this value might decrease latency but might also decrease quality if the voice provider struggles to pronounce the text correctly.
These are the punctuations that are considered valid boundaries before a chunk is created. The chunks that are sent to the voice provider for the voice generation as the model tokens are streaming in. Defaults are chosen differently for each provider.
Constraining the delimiters might add latency as it waits for the model to output a full chunk before sending it to the voice provider. On the other hand, constraining might be a good idea if you want to give voice provider longer chunks so it can sound less disjointed across chunks. Eg. ['.'].
。
, ,
, .
, !
, ?
, ;
, )
, ،
, ۔
, ।
, ॥
, |
, ||
, ,
, :
This determines whether fillers are injected into the model output before inputting it into the voice provider.
Default false
because you can achieve better results with prompting the model.
This is the voice provider that will be used.
azure
This is the provider-specific ID that will be used.
andrew
, brian
, emma
This is the speed multiplier that will be used.
This is the mode for the first message. Default is 'assistant-speaks-first'.
Use:
- 'assistant-speaks-first' to have the assistant speak first.
- 'assistant-waits-for-user' to have the assistant wait for the user to speak first.
- 'assistant-speaks-first-with-model-generated-message' to have the assistant speak first with a message generated by the model based on the conversation state. (
assistant.model.messages
at call start,call.messages
at squad transfer points).
@default 'assistant-speaks-first'
assistant-speaks-first
, assistant-speaks-first-with-model-generated-message
, assistant-waits-for-user
This sets whether the assistant's calls are recorded. Defaults to true.
When this is enabled, no logs, recordings, or transcriptions will be stored. At the end of the call, you will still receive an end-of-call-report message to store on your server. Defaults to false.
These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transcript,tool-calls,user-interrupted,voice-input. You can check the shape of the messages in ClientMessage schema.
conversation-update
, function-call
, function-call-result
, hang
, metadata
, model-output
, speech-update
, status-update
, transcript
, tool-calls
, tool-calls-result
, user-interrupted
, voice-input
These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,user-interrupted. You can check the shape of the messages in ServerMessage schema.
conversation-update
, end-of-call-report
, function-call
, hang
, model-output
, phone-call-control
, speech-update
, status-update
, transcript
, tool-calls
, transfer-destination-request
, user-interrupted
, voice-input
How many seconds of silence to wait before ending the call. Defaults to 30.
@default 30
The minimum number of seconds after user speech to wait before the assistant starts speaking. Defaults to 0.4.
@default 0.4
The minimum number of seconds to wait after transcription (with punctuation) before sending a request to the LLM. Defaults to 0.1.
@default 0.1
The minimum number of seconds to wait after transcription (without punctuation) before sending a request to the LLM. Defaults to 1.5.
@default 1.5
The number of words to wait for before interrupting the assistant.
Words like "stop", "actually", "no", etc. will always interrupt immediately regardless of this value.
Words like "okay", "yeah", "right" will never interrupt.
When set to 0, it will rely solely on the VAD (Voice Activity Detector) and will not wait for any transcription. Defaults to this (0).
@default 0
This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.
@default 1800 (~30 minutes)
This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'.
off
, office
This determines whether the model says 'mhmm', 'ahem' etc. while user is speaking.
Default false
while in beta.
@default false
This enables filtering of noise and background speech while the user is talking.
Default false
while in beta.
@default false
This determines whether the model's output is used in conversation history rather than the transcription of assistant's speech.
Default false
while in beta.
@default false
This is the name of the assistant.
This is required when you want to transfer between assistants in a call.
This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).
If unspecified, assistant will wait for user to speak and use the model to respond once they speak.
These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool]. This uses Twilio's built-in detection while the VoicemailTool relies on the model to detect if a voicemail was reached. You can use neither of them, one of them, or both of them. By default, Twilio built-in detection is enabled while VoicemailTool is not.
This is the provider to use for voicemail detection.
twilio
These are the AMD messages from Twilio that are considered as voicemail. Default is ['machine_end_beep', 'machine_end_silence'].
@default {Array} ['machine_end_beep', 'machine_end_silence']
machine_start
, human
, fax
, unknown
, machine_end_beep
, machine_end_silence
, machine_end_other
This sets whether the assistant should detect voicemail. Defaults to true.
@default true
The number of seconds that Twilio should attempt to perform answering machine detection before timing out and returning AnsweredBy as unknown. Default is 30 seconds.
Increasing this value will provide the engine more time to make a determination. This can be useful when DetectMessageEnd is provided in the MachineDetection parameter and there is an expectation of long answering machine greetings that can exceed 30 seconds.
Decreasing this value will reduce the amount of time the engine has to make a determination. This can be particularly useful when the Enable option is provided in the MachineDetection parameter and you want to limit the time for initial detection.
Check the Twilio docs for more info.
@default 30
The number of milliseconds that is used as the measuring stick for the length of the speech activity. Durations lower than this value will be interpreted as a human, longer as a machine. Default is 2400 milliseconds.
Increasing this value will reduce the chance of a False Machine (detected machine, actually human) for a long human greeting (e.g., a business greeting) but increase the time it takes to detect a machine.
Decreasing this value will reduce the chances of a False Human (detected human, actually machine) for short voicemail greetings. The value of this parameter may need to be reduced by more than 1000ms to detect very short voicemail greetings. A reduction of that significance can result in increased False Machine detections. Adjusting the MachineDetectionSpeechEndThreshold is likely the better approach for short voicemails. Decreasing MachineDetectionSpeechThreshold will also reduce the time it takes to detect a machine.
Check the Twilio docs for more info.
@default 2400
The number of milliseconds of silence after speech activity at which point the speech activity is considered complete. Default is 1200 milliseconds.
Increasing this value will typically be used to better address the short voicemail greeting scenarios. For short voicemails, there is typically 1000-2000ms of audio followed by 1200-2400ms of silence and then additional audio before the beep. Increasing the MachineDetectionSpeechEndThreshold to ~2500ms will treat the 1200-2400ms of silence as a gap in the greeting but not the end of the greeting and will result in a machine detection. The downsides of such a change include:
- Increasing the delay for human detection by the amount you increase this parameter, e.g., a change of 1200ms to 2500ms increases human detection delay by 1300ms.
- Cases where a human has two utterances separated by a period of silence (e.g. a "Hello", then 2000ms of silence, and another "Hello") may be interpreted as a machine.
Decreasing this value will result in faster human detection. The consequence is that it can lead to increased False Human (detected human, actually machine) detections because a silence gap in a voicemail greeting (not necessarily just in short voicemail scenarios) can be incorrectly interpreted as the end of speech.
Check the Twilio docs for more info.
@default 1200
The number of milliseconds of initial silence after which an unknown AnsweredBy result will be returned. Default is 5000 milliseconds.
Increasing this value will result in waiting for a longer period of initial silence before returning an 'unknown' AMD result.
Decreasing this value will result in waiting for a shorter period of initial silence before returning an 'unknown' AMD result.
Check the Twilio docs for more info.
@default 5000
This is the message that the assistant will say if the call is forwarded to voicemail.
If unspecified, it will hang up.
This is the message that the assistant will say if it ends the call.
If unspecified, it will hang up without saying anything.
This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.
This is for metadata you want to store on the assistant.
This is the URL Vapi will communicate with via HTTP GET and POST Requests. This is used for retrieving context, function calling, and end-of-call reports.
All requests will be sent with the call object among other things relevant to that message. You can find more details in the Server URL documentation.
This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: tool.server.url > assistant.serverUrl > phoneNumber.serverUrl > org.serverUrl
This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.
Same precedence logic as serverUrl.
This is the plan for analysis of assistant's calls. Stored in call.analysis
.
This is the prompt that's used to summarize the call. The output is stored in call.analysis.summary
.
Default is "You are an expert note-taker. You will be given a transcript of a call. Summarize the call in 2-3 sentences, if applicable.".
Set to '' or 'off' to disable.
This is how long the request is tried before giving up. When request times out, call.analysis.summary
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is how long the request is tried before giving up. When request times out, call.analysis.structuredData
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is the prompt that's used to evaluate if the call was successful. The output is stored in call.analysis.successEvaluation
.
Default is "You are an expert call evaluator. You will be given a transcript of a call and the system prompt of the AI participant. Determine if the call was successful based on the objectives inferred from the system prompt.".
Set to '' or 'off' to disable.
You can use this standalone or in combination with successEvaluationRubric
. If both are provided, they are concatenated into appropriate instructions.
This enforces the rubric of the evaluation. The output is stored in call.analysis.successEvaluation
.
Options include:
- 'NumericScale': A scale of 1 to 10.
- 'DescriptiveScale': A scale of Excellent, Good, Fair, Poor.
- 'Checklist': A checklist of criteria and their status.
- 'Matrix': A grid that evaluates multiple criteria across different performance levels.
- 'PercentageScale': A scale of 0% to 100%.
- 'LikertScale': A scale of Strongly Agree, Agree, Neutral, Disagree, Strongly Disagree.
- 'AutomaticRubric': Automatically break down evaluation into several criteria, each with its own score.
- 'PassFail': A simple 'true' if call passed, 'false' if not.
For 'Checklist' and 'Matrix', provide the criteria in successEvaluationPrompt
.
Default is 'PassFail' if successEvaluationPrompt
is not provided, and null if successEvaluationPrompt
is provided.
You can use this standalone or in combination with successEvaluationPrompt
. If both are provided, they are concatenated into appropriate instructions.
NumericScale
, DescriptiveScale
, Checklist
, Matrix
, PercentageScale
, LikertScale
, AutomaticRubric
, PassFail
This is how long the request is tried before giving up. When request times out, call.analysis.successEvaluation
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is the prompt that's used to extract structured data from the call. The output is stored in call.analysis.structuredData
.
Disabled by default.
You can use this standalone or in combination with structuredDataSchema
. If both are provided, they are concatenated into appropriate instructions.
This enforces the schema of the structured data. This output is stored in call.analysis.structuredData
.
Complete guide on JSON Schema can be found here.
Disabled by default.
You can use this standalone or in combination with structuredDataPrompt
. If both are provided, they are concatenated into appropriate instructions.
This is the type of output you'd like.
string
, number
, integer
, boolean
are the primitive types and should be obvious.
array
and object
are more interesting and quite powerful. They allow you to define nested structures.
For array
, you can define the schema of the items in the array using the items
property.
For object
, you can define the properties of the object using the properties
property.
string
, number
, integer
, boolean
, array
, object
This is required if the type is "array". This is the schema of the items in the array.
This is of type JsonSchema. However, Swagger doesn't support circular references.
This is required if the type is "object". This specifies the properties of the object.
This is a map of string to JsonSchema. However, Swagger doesn't support circular references.
This is the description to help the model understand what it needs to output.
This is a list of properties that are required.
This only makes sense if the type is "object".
This is the plan for artifacts generated during assistant's calls. Stored in call.artifact
.
This determines whether the video is recorded during the call. Default is false. Only relevant for webCall
type.
This is the plan for static messages that can be spoken by the assistant during the call, like idleMessages
.
Note: firstMessage
, voicemailMessage
, and endCallMessage
are currently at the root level. They will be moved to messagePlan
in the future, but will remain backwards compatible.
This are the messages that the assistant will speak when the user hasn't responded for idleTimeoutSeconds
. Each time the timeout is triggered, a random message will be chosen from this array.
@default null (no idle message is spoken)
This determines the maximum number of times idleMessages
can be spoken during the call.
@default 3
This is the timeout in seconds before a message from idleMessages
is spoken. The clock starts when the assistant finishes speaking and remains active until the user speaks.
@default 7.5
This can be used to override the assistant's settings and provide values for it's template variables.
These are the options for the assistant's transcriber.
This is the transcription provider that will be used.
deepgram
This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview
nova-2
, nova-2-general
, nova-2-meeting
, nova-2-phonecall
, nova-2-finance
, nova-2-conversationalai
, nova-2-voicemail
, nova-2-video
, nova-2-medical
, nova-2-drivethru
, nova-2-automotive
, nova
, nova-general
, nova-phonecall
, nova-medical
, enhanced
, enhanced-general
, enhanced-meeting
, enhanced-phonecall
, enhanced-finance
, base
, base-general
, base-meeting
, base-phonecall
, base-finance
, base-conversationalai
, base-voicemail
, base-video
This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview
bg
, ca
, cs
, da
, da-DK
, de
, de-CH
, el
, en
, en-AU
, en-GB
, en-IN
, en-NZ
, en-US
, es
, es-419
, es-LATAM
, et
, fi
, fr
, fr-CA
, hi
, hi-Latn
, hu
, id
, it
, ja
, ko
, ko-KR
, lt
, lv
, ms
, nl
, nl-BE
, no
, pl
, pt
, pt-BR
, ro
, ru
, sk
, sv
, sv-SE
, ta
, taq
, th
, th-TH
, tr
, uk
, vi
, zh
, zh-CN
, zh-Hans
, zh-Hant
, zh-TW
This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times sometimes but it's getting better.
These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.
These are the options for the assistant's LLM.
This is the starting state for the conversation.
assistant
, function
, user
, system
, tool
These are the tools that the assistant can use during the call. To use existing tools, use toolIds
.
Both tools
and toolIds
can be used together.
This determines if the tool is async.
If async, the assistant will move forward without waiting for your server to respond. This is useful if you just want to trigger something on your server.
If sync, the assistant will wait for your server to respond. This is useful if want assistant to respond with the result from your server.
Defaults to synchronous (false
).
These are the messages that will be spoken to the user as the tool is running.
For some tools, this is auto-filled based on special fields like tool.destinations
. For others like the function tool, these can be custom configured.
This message is triggered when the tool call starts.
This message is never triggered for async tools.
If this message is not provided, one of the default filler messages "Hold on a sec", "One moment", "Just a sec", "Give me a moment" or "This'll just take a sec" will be used.
request-start
This is the content that the assistant says when this message is triggered.
This is an optional array of conditions that the tool call arguments must meet in order for this message to be triggered.
This is the name of the parameter that you want to check.
This is the value you want to compare against the parameter.
This is the operator you want to use to compare the parameter and value.
eq
, neq
, gt
, gte
, lt
, lte
The type of tool. "dtmf" for DTMF tool.
dtmf
This is the function definition of the tool.
For endCall
, transferCall
, and dtmf
tools, this is auto-filled based on tool-specific fields like tool.destinations
. But, even in those cases, you can provide a custom function definition for advanced use cases.
An example of an advanced use case is if you want to customize the message that's spoken for endCall
tool. You can specify a function where it returns an argument "reason". Then, in messages
array, you can have many "request-complete" messages. One of these messages will be triggered if the messages[].conditions
matches the "reason" argument.
This is the the name of the function to be called.
Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
This is the description of what the function does, used by the AI to choose when and how to call the function.
These are the parameters the functions accepts, described as a JSON Schema object.
See the OpenAI guide for examples, and the JSON Schema reference for documentation about the format.
Omitting parameters defines a function with an empty parameter list.
This must be set to 'object'. It instructs the model to return a JSON object containing the function call properties.
object
This provides a description of the properties required by the function. JSON Schema can be used to specify expectations for each property. Refer to this doc for a comprehensive guide on JSON Schema.
This specifies the properties that are required by the function.
This is the server that will be hit when this tool is requested by the model.
All requests will be sent with the call object among other things. You can find more details in the Server URL documentation.
This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: highest tool.server.url, then assistant.serverUrl, then phoneNumber.serverUrl, then org.serverUrl.
This is the timeout in seconds for the request to your server. Defaults to 20 seconds.
@default 20
API endpoint to send requests to.
This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.
Same precedence logic as server.
These are the tools that the assistant can use during the call. To use transient tools, use tools
.
Both tools
and toolIds
can be used together.
anyscale
This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b
This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency.
These are the options for the knowledge base.
canonical
This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250.
This determines whether we detect user's emotion while they speak and send it as an additional info to model.
Default false
because the model is usually are good at understanding the user's emotion from text.
These are the options for the assistant's voice.
This determines whether the model output is preprocessed into chunks before being sent to the voice provider.
Default true
because voice generation sounds better with chunking (and reformatting them).
To send every token from the model output directly to the voice provider and rely on the voice provider's audio generation logic, set this to false
.
If disabled, vapi-provided audio control tokens like <flush /> will not work.
This determines whether the chunk is reformatted before being sent to the voice provider. Many things are reformatted including phone numbers, emails and addresses to improve their enunciation.
Default true
because voice generation sounds better with reformatting.
To disable chunk reformatting, set this to false
.
To disable chunking completely, set inputPreprocessingEnabled
to false
.
This is the minimum number of characters before a chunk is created. The chunks that are sent to the voice provider for the voice generation as the model tokens are streaming in. Defaults to 30.
Increasing this value might add latency as it waits for the model to output a full chunk before sending it to the voice provider. On the other hand, increasing might be a good idea if you want to give voice provider bigger chunks so it can pronounce them better.
Decreasing this value might decrease latency but might also decrease quality if the voice provider struggles to pronounce the text correctly.
These are the punctuations that are considered valid boundaries before a chunk is created. The chunks that are sent to the voice provider for the voice generation as the model tokens are streaming in. Defaults are chosen differently for each provider.
Constraining the delimiters might add latency as it waits for the model to output a full chunk before sending it to the voice provider. On the other hand, constraining might be a good idea if you want to give voice provider longer chunks so it can sound less disjointed across chunks. Eg. ['.'].
。
, ,
, .
, !
, ?
, ;
, )
, ،
, ۔
, ।
, ॥
, |
, ||
, ,
, :
This determines whether fillers are injected into the model output before inputting it into the voice provider.
Default false
because you can achieve better results with prompting the model.
This is the voice provider that will be used.
azure
This is the provider-specific ID that will be used.
andrew
, brian
, emma
This is the speed multiplier that will be used.
This is the mode for the first message. Default is 'assistant-speaks-first'.
Use:
- 'assistant-speaks-first' to have the assistant speak first.
- 'assistant-waits-for-user' to have the assistant wait for the user to speak first.
- 'assistant-speaks-first-with-model-generated-message' to have the assistant speak first with a message generated by the model based on the conversation state. (
assistant.model.messages
at call start,call.messages
at squad transfer points).
@default 'assistant-speaks-first'
assistant-speaks-first
, assistant-speaks-first-with-model-generated-message
, assistant-waits-for-user
This sets whether the assistant's calls are recorded. Defaults to true.
When this is enabled, no logs, recordings, or transcriptions will be stored. At the end of the call, you will still receive an end-of-call-report message to store on your server. Defaults to false.
These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transcript,tool-calls,user-interrupted,voice-input. You can check the shape of the messages in ClientMessage schema.
conversation-update
, function-call
, function-call-result
, hang
, metadata
, model-output
, speech-update
, status-update
, transcript
, tool-calls
, tool-calls-result
, user-interrupted
, voice-input
These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,user-interrupted. You can check the shape of the messages in ServerMessage schema.
conversation-update
, end-of-call-report
, function-call
, hang
, model-output
, phone-call-control
, speech-update
, status-update
, transcript
, tool-calls
, transfer-destination-request
, user-interrupted
, voice-input
How many seconds of silence to wait before ending the call. Defaults to 30.
@default 30
The minimum number of seconds after user speech to wait before the assistant starts speaking. Defaults to 0.4.
@default 0.4
The minimum number of seconds to wait after transcription (with punctuation) before sending a request to the LLM. Defaults to 0.1.
@default 0.1
The minimum number of seconds to wait after transcription (without punctuation) before sending a request to the LLM. Defaults to 1.5.
@default 1.5
The number of words to wait for before interrupting the assistant.
Words like "stop", "actually", "no", etc. will always interrupt immediately regardless of this value.
Words like "okay", "yeah", "right" will never interrupt.
When set to 0, it will rely solely on the VAD (Voice Activity Detector) and will not wait for any transcription. Defaults to this (0).
@default 0
This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.
@default 1800 (~30 minutes)
This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'.
off
, office
This determines whether the model says 'mhmm', 'ahem' etc. while user is speaking.
Default false
while in beta.
@default false
This enables filtering of noise and background speech while the user is talking.
Default false
while in beta.
@default false
This determines whether the model's output is used in conversation history rather than the transcription of assistant's speech.
Default false
while in beta.
@default false
These are values that will be used to replace the template variables in the assistant messages and other text-based fields.
This is the name of the assistant.
This is required when you want to transfer between assistants in a call.
This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).
If unspecified, assistant will wait for user to speak and use the model to respond once they speak.
These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool]. This uses Twilio's built-in detection while the VoicemailTool relies on the model to detect if a voicemail was reached. You can use neither of them, one of them, or both of them. By default, Twilio built-in detection is enabled while VoicemailTool is not.
This is the provider to use for voicemail detection.
twilio
These are the AMD messages from Twilio that are considered as voicemail. Default is ['machine_end_beep', 'machine_end_silence'].
@default {Array} ['machine_end_beep', 'machine_end_silence']
machine_start
, human
, fax
, unknown
, machine_end_beep
, machine_end_silence
, machine_end_other
This sets whether the assistant should detect voicemail. Defaults to true.
@default true
The number of seconds that Twilio should attempt to perform answering machine detection before timing out and returning AnsweredBy as unknown. Default is 30 seconds.
Increasing this value will provide the engine more time to make a determination. This can be useful when DetectMessageEnd is provided in the MachineDetection parameter and there is an expectation of long answering machine greetings that can exceed 30 seconds.
Decreasing this value will reduce the amount of time the engine has to make a determination. This can be particularly useful when the Enable option is provided in the MachineDetection parameter and you want to limit the time for initial detection.
Check the Twilio docs for more info.
@default 30
The number of milliseconds that is used as the measuring stick for the length of the speech activity. Durations lower than this value will be interpreted as a human, longer as a machine. Default is 2400 milliseconds.
Increasing this value will reduce the chance of a False Machine (detected machine, actually human) for a long human greeting (e.g., a business greeting) but increase the time it takes to detect a machine.
Decreasing this value will reduce the chances of a False Human (detected human, actually machine) for short voicemail greetings. The value of this parameter may need to be reduced by more than 1000ms to detect very short voicemail greetings. A reduction of that significance can result in increased False Machine detections. Adjusting the MachineDetectionSpeechEndThreshold is likely the better approach for short voicemails. Decreasing MachineDetectionSpeechThreshold will also reduce the time it takes to detect a machine.
Check the Twilio docs for more info.
@default 2400
The number of milliseconds of silence after speech activity at which point the speech activity is considered complete. Default is 1200 milliseconds.
Increasing this value will typically be used to better address the short voicemail greeting scenarios. For short voicemails, there is typically 1000-2000ms of audio followed by 1200-2400ms of silence and then additional audio before the beep. Increasing the MachineDetectionSpeechEndThreshold to ~2500ms will treat the 1200-2400ms of silence as a gap in the greeting but not the end of the greeting and will result in a machine detection. The downsides of such a change include:
- Increasing the delay for human detection by the amount you increase this parameter, e.g., a change of 1200ms to 2500ms increases human detection delay by 1300ms.
- Cases where a human has two utterances separated by a period of silence (e.g. a "Hello", then 2000ms of silence, and another "Hello") may be interpreted as a machine.
Decreasing this value will result in faster human detection. The consequence is that it can lead to increased False Human (detected human, actually machine) detections because a silence gap in a voicemail greeting (not necessarily just in short voicemail scenarios) can be incorrectly interpreted as the end of speech.
Check the Twilio docs for more info.
@default 1200
The number of milliseconds of initial silence after which an unknown AnsweredBy result will be returned. Default is 5000 milliseconds.
Increasing this value will result in waiting for a longer period of initial silence before returning an 'unknown' AMD result.
Decreasing this value will result in waiting for a shorter period of initial silence before returning an 'unknown' AMD result.
Check the Twilio docs for more info.
@default 5000
This is the message that the assistant will say if the call is forwarded to voicemail.
If unspecified, it will hang up.
This is the message that the assistant will say if it ends the call.
If unspecified, it will hang up without saying anything.
This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.
This is for metadata you want to store on the assistant.
This is the URL Vapi will communicate with via HTTP GET and POST Requests. This is used for retrieving context, function calling, and end-of-call reports.
All requests will be sent with the call object among other things relevant to that message. You can find more details in the Server URL documentation.
This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: tool.server.url > assistant.serverUrl > phoneNumber.serverUrl > org.serverUrl
This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.
Same precedence logic as serverUrl.
This is the plan for analysis of assistant's calls. Stored in call.analysis
.
This is the prompt that's used to summarize the call. The output is stored in call.analysis.summary
.
Default is "You are an expert note-taker. You will be given a transcript of a call. Summarize the call in 2-3 sentences, if applicable.".
Set to '' or 'off' to disable.
This is how long the request is tried before giving up. When request times out, call.analysis.summary
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is how long the request is tried before giving up. When request times out, call.analysis.structuredData
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is the prompt that's used to evaluate if the call was successful. The output is stored in call.analysis.successEvaluation
.
Default is "You are an expert call evaluator. You will be given a transcript of a call and the system prompt of the AI participant. Determine if the call was successful based on the objectives inferred from the system prompt.".
Set to '' or 'off' to disable.
You can use this standalone or in combination with successEvaluationRubric
. If both are provided, they are concatenated into appropriate instructions.
This enforces the rubric of the evaluation. The output is stored in call.analysis.successEvaluation
.
Options include:
- 'NumericScale': A scale of 1 to 10.
- 'DescriptiveScale': A scale of Excellent, Good, Fair, Poor.
- 'Checklist': A checklist of criteria and their status.
- 'Matrix': A grid that evaluates multiple criteria across different performance levels.
- 'PercentageScale': A scale of 0% to 100%.
- 'LikertScale': A scale of Strongly Agree, Agree, Neutral, Disagree, Strongly Disagree.
- 'AutomaticRubric': Automatically break down evaluation into several criteria, each with its own score.
- 'PassFail': A simple 'true' if call passed, 'false' if not.
For 'Checklist' and 'Matrix', provide the criteria in successEvaluationPrompt
.
Default is 'PassFail' if successEvaluationPrompt
is not provided, and null if successEvaluationPrompt
is provided.
You can use this standalone or in combination with successEvaluationPrompt
. If both are provided, they are concatenated into appropriate instructions.
NumericScale
, DescriptiveScale
, Checklist
, Matrix
, PercentageScale
, LikertScale
, AutomaticRubric
, PassFail
This is how long the request is tried before giving up. When request times out, call.analysis.successEvaluation
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is the prompt that's used to extract structured data from the call. The output is stored in call.analysis.structuredData
.
Disabled by default.
You can use this standalone or in combination with structuredDataSchema
. If both are provided, they are concatenated into appropriate instructions.
This enforces the schema of the structured data. This output is stored in call.analysis.structuredData
.
Complete guide on JSON Schema can be found here.
Disabled by default.
You can use this standalone or in combination with structuredDataPrompt
. If both are provided, they are concatenated into appropriate instructions.
This is the type of output you'd like.
string
, number
, integer
, boolean
are the primitive types and should be obvious.
array
and object
are more interesting and quite powerful. They allow you to define nested structures.
For array
, you can define the schema of the items in the array using the items
property.
For object
, you can define the properties of the object using the properties
property.
string
, number
, integer
, boolean
, array
, object
This is required if the type is "array". This is the schema of the items in the array.
This is of type JsonSchema. However, Swagger doesn't support circular references.
This is required if the type is "object". This specifies the properties of the object.
This is a map of string to JsonSchema. However, Swagger doesn't support circular references.
This is the description to help the model understand what it needs to output.
This is a list of properties that are required.
This only makes sense if the type is "object".
This is the plan for artifacts generated during assistant's calls. Stored in call.artifact
.
This determines whether the video is recorded during the call. Default is false. Only relevant for webCall
type.
This is the plan for static messages that can be spoken by the assistant during the call, like idleMessages
.
Note: firstMessage
, voicemailMessage
, and endCallMessage
are currently at the root level. They will be moved to messagePlan
in the future, but will remain backwards compatible.
This are the messages that the assistant will speak when the user hasn't responded for idleTimeoutSeconds
. Each time the timeout is triggered, a random message will be chosen from this array.
@default null (no idle message is spoken)
This determines the maximum number of times idleMessages
can be spoken during the call.
@default 3
This is the timeout in seconds before a message from idleMessages
is spoken. The clock starts when the assistant finishes speaking and remains active until the user speaks.
@default 7.5
These are the others assistants that this assistant can transfer to. These destinations are in addition to destinations that already exist in the assistant's TransferCall tool.
assistant
This is the assistant to transfer the call to.
This is the message to say before transferring the call to the destination.
This is the description of the destination, used by the AI to choose when and how to transfer the call.
This can be used to override all the assistants' settings and provide values for their template variables.
Both membersOverrides
and members[n].assistantOverrides
can be used together. First, members[n].assistantOverrides
is applied. Then, membersOverrides
is applied as a global override.
These are the options for the assistant's transcriber.
This is the transcription provider that will be used.
deepgram
This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview
nova-2
, nova-2-general
, nova-2-meeting
, nova-2-phonecall
, nova-2-finance
, nova-2-conversationalai
, nova-2-voicemail
, nova-2-video
, nova-2-medical
, nova-2-drivethru
, nova-2-automotive
, nova
, nova-general
, nova-phonecall
, nova-medical
, enhanced
, enhanced-general
, enhanced-meeting
, enhanced-phonecall
, enhanced-finance
, base
, base-general
, base-meeting
, base-phonecall
, base-finance
, base-conversationalai
, base-voicemail
, base-video
This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview
bg
, ca
, cs
, da
, da-DK
, de
, de-CH
, el
, en
, en-AU
, en-GB
, en-IN
, en-NZ
, en-US
, es
, es-419
, es-LATAM
, et
, fi
, fr
, fr-CA
, hi
, hi-Latn
, hu
, id
, it
, ja
, ko
, ko-KR
, lt
, lv
, ms
, nl
, nl-BE
, no
, pl
, pt
, pt-BR
, ro
, ru
, sk
, sv
, sv-SE
, ta
, taq
, th
, th-TH
, tr
, uk
, vi
, zh
, zh-CN
, zh-Hans
, zh-Hant
, zh-TW
This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times sometimes but it's getting better.
These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.
These are the options for the assistant's LLM.
This is the starting state for the conversation.
assistant
, function
, user
, system
, tool
These are the tools that the assistant can use during the call. To use existing tools, use toolIds
.
Both tools
and toolIds
can be used together.
This determines if the tool is async.
If async, the assistant will move forward without waiting for your server to respond. This is useful if you just want to trigger something on your server.
If sync, the assistant will wait for your server to respond. This is useful if want assistant to respond with the result from your server.
Defaults to synchronous (false
).
These are the messages that will be spoken to the user as the tool is running.
For some tools, this is auto-filled based on special fields like tool.destinations
. For others like the function tool, these can be custom configured.
This message is triggered when the tool call starts.
This message is never triggered for async tools.
If this message is not provided, one of the default filler messages "Hold on a sec", "One moment", "Just a sec", "Give me a moment" or "This'll just take a sec" will be used.
request-start
This is the content that the assistant says when this message is triggered.
This is an optional array of conditions that the tool call arguments must meet in order for this message to be triggered.
This is the name of the parameter that you want to check.
This is the value you want to compare against the parameter.
This is the operator you want to use to compare the parameter and value.
eq
, neq
, gt
, gte
, lt
, lte
The type of tool. "dtmf" for DTMF tool.
dtmf
This is the function definition of the tool.
For endCall
, transferCall
, and dtmf
tools, this is auto-filled based on tool-specific fields like tool.destinations
. But, even in those cases, you can provide a custom function definition for advanced use cases.
An example of an advanced use case is if you want to customize the message that's spoken for endCall
tool. You can specify a function where it returns an argument "reason". Then, in messages
array, you can have many "request-complete" messages. One of these messages will be triggered if the messages[].conditions
matches the "reason" argument.
This is the the name of the function to be called.
Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
This is the description of what the function does, used by the AI to choose when and how to call the function.
These are the parameters the functions accepts, described as a JSON Schema object.
See the OpenAI guide for examples, and the JSON Schema reference for documentation about the format.
Omitting parameters defines a function with an empty parameter list.
This must be set to 'object'. It instructs the model to return a JSON object containing the function call properties.
object
This provides a description of the properties required by the function. JSON Schema can be used to specify expectations for each property. Refer to this doc for a comprehensive guide on JSON Schema.
This specifies the properties that are required by the function.
This is the server that will be hit when this tool is requested by the model.
All requests will be sent with the call object among other things. You can find more details in the Server URL documentation.
This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: highest tool.server.url, then assistant.serverUrl, then phoneNumber.serverUrl, then org.serverUrl.
This is the timeout in seconds for the request to your server. Defaults to 20 seconds.
@default 20
API endpoint to send requests to.
This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.
Same precedence logic as server.
These are the tools that the assistant can use during the call. To use transient tools, use tools
.
Both tools
and toolIds
can be used together.
anyscale
This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b
This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency.
These are the options for the knowledge base.
canonical
This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250.
This determines whether we detect user's emotion while they speak and send it as an additional info to model.
Default false
because the model is usually are good at understanding the user's emotion from text.
These are the options for the assistant's voice.
This determines whether the model output is preprocessed into chunks before being sent to the voice provider.
Default true
because voice generation sounds better with chunking (and reformatting them).
To send every token from the model output directly to the voice provider and rely on the voice provider's audio generation logic, set this to false
.
If disabled, vapi-provided audio control tokens like <flush /> will not work.
This determines whether the chunk is reformatted before being sent to the voice provider. Many things are reformatted including phone numbers, emails and addresses to improve their enunciation.
Default true
because voice generation sounds better with reformatting.
To disable chunk reformatting, set this to false
.
To disable chunking completely, set inputPreprocessingEnabled
to false
.
This is the minimum number of characters before a chunk is created. The chunks that are sent to the voice provider for the voice generation as the model tokens are streaming in. Defaults to 30.
Increasing this value might add latency as it waits for the model to output a full chunk before sending it to the voice provider. On the other hand, increasing might be a good idea if you want to give voice provider bigger chunks so it can pronounce them better.
Decreasing this value might decrease latency but might also decrease quality if the voice provider struggles to pronounce the text correctly.
These are the punctuations that are considered valid boundaries before a chunk is created. The chunks that are sent to the voice provider for the voice generation as the model tokens are streaming in. Defaults are chosen differently for each provider.
Constraining the delimiters might add latency as it waits for the model to output a full chunk before sending it to the voice provider. On the other hand, constraining might be a good idea if you want to give voice provider longer chunks so it can sound less disjointed across chunks. Eg. ['.'].
。
, ,
, .
, !
, ?
, ;
, )
, ،
, ۔
, ।
, ॥
, |
, ||
, ,
, :
This determines whether fillers are injected into the model output before inputting it into the voice provider.
Default false
because you can achieve better results with prompting the model.
This is the voice provider that will be used.
azure
This is the provider-specific ID that will be used.
andrew
, brian
, emma
This is the speed multiplier that will be used.
This is the mode for the first message. Default is 'assistant-speaks-first'.
Use:
- 'assistant-speaks-first' to have the assistant speak first.
- 'assistant-waits-for-user' to have the assistant wait for the user to speak first.
- 'assistant-speaks-first-with-model-generated-message' to have the assistant speak first with a message generated by the model based on the conversation state. (
assistant.model.messages
at call start,call.messages
at squad transfer points).
@default 'assistant-speaks-first'
assistant-speaks-first
, assistant-speaks-first-with-model-generated-message
, assistant-waits-for-user
This sets whether the assistant's calls are recorded. Defaults to true.
When this is enabled, no logs, recordings, or transcriptions will be stored. At the end of the call, you will still receive an end-of-call-report message to store on your server. Defaults to false.
These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transcript,tool-calls,user-interrupted,voice-input. You can check the shape of the messages in ClientMessage schema.
conversation-update
, function-call
, function-call-result
, hang
, metadata
, model-output
, speech-update
, status-update
, transcript
, tool-calls
, tool-calls-result
, user-interrupted
, voice-input
These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,user-interrupted. You can check the shape of the messages in ServerMessage schema.
conversation-update
, end-of-call-report
, function-call
, hang
, model-output
, phone-call-control
, speech-update
, status-update
, transcript
, tool-calls
, transfer-destination-request
, user-interrupted
, voice-input
How many seconds of silence to wait before ending the call. Defaults to 30.
@default 30
The minimum number of seconds after user speech to wait before the assistant starts speaking. Defaults to 0.4.
@default 0.4
The minimum number of seconds to wait after transcription (with punctuation) before sending a request to the LLM. Defaults to 0.1.
@default 0.1
The minimum number of seconds to wait after transcription (without punctuation) before sending a request to the LLM. Defaults to 1.5.
@default 1.5
The number of words to wait for before interrupting the assistant.
Words like "stop", "actually", "no", etc. will always interrupt immediately regardless of this value.
Words like "okay", "yeah", "right" will never interrupt.
When set to 0, it will rely solely on the VAD (Voice Activity Detector) and will not wait for any transcription. Defaults to this (0).
@default 0
This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.
@default 1800 (~30 minutes)
This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'.
off
, office
This determines whether the model says 'mhmm', 'ahem' etc. while user is speaking.
Default false
while in beta.
@default false
This enables filtering of noise and background speech while the user is talking.
Default false
while in beta.
@default false
This determines whether the model's output is used in conversation history rather than the transcription of assistant's speech.
Default false
while in beta.
@default false
These are values that will be used to replace the template variables in the assistant messages and other text-based fields.
This is the name of the assistant.
This is required when you want to transfer between assistants in a call.
This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).
If unspecified, assistant will wait for user to speak and use the model to respond once they speak.
These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool]. This uses Twilio's built-in detection while the VoicemailTool relies on the model to detect if a voicemail was reached. You can use neither of them, one of them, or both of them. By default, Twilio built-in detection is enabled while VoicemailTool is not.
This is the provider to use for voicemail detection.
twilio
These are the AMD messages from Twilio that are considered as voicemail. Default is ['machine_end_beep', 'machine_end_silence'].
@default {Array} ['machine_end_beep', 'machine_end_silence']
machine_start
, human
, fax
, unknown
, machine_end_beep
, machine_end_silence
, machine_end_other
This sets whether the assistant should detect voicemail. Defaults to true.
@default true
The number of seconds that Twilio should attempt to perform answering machine detection before timing out and returning AnsweredBy as unknown. Default is 30 seconds.
Increasing this value will provide the engine more time to make a determination. This can be useful when DetectMessageEnd is provided in the MachineDetection parameter and there is an expectation of long answering machine greetings that can exceed 30 seconds.
Decreasing this value will reduce the amount of time the engine has to make a determination. This can be particularly useful when the Enable option is provided in the MachineDetection parameter and you want to limit the time for initial detection.
Check the Twilio docs for more info.
@default 30
The number of milliseconds that is used as the measuring stick for the length of the speech activity. Durations lower than this value will be interpreted as a human, longer as a machine. Default is 2400 milliseconds.
Increasing this value will reduce the chance of a False Machine (detected machine, actually human) for a long human greeting (e.g., a business greeting) but increase the time it takes to detect a machine.
Decreasing this value will reduce the chances of a False Human (detected human, actually machine) for short voicemail greetings. The value of this parameter may need to be reduced by more than 1000ms to detect very short voicemail greetings. A reduction of that significance can result in increased False Machine detections. Adjusting the MachineDetectionSpeechEndThreshold is likely the better approach for short voicemails. Decreasing MachineDetectionSpeechThreshold will also reduce the time it takes to detect a machine.
Check the Twilio docs for more info.
@default 2400
The number of milliseconds of silence after speech activity at which point the speech activity is considered complete. Default is 1200 milliseconds.
Increasing this value will typically be used to better address the short voicemail greeting scenarios. For short voicemails, there is typically 1000-2000ms of audio followed by 1200-2400ms of silence and then additional audio before the beep. Increasing the MachineDetectionSpeechEndThreshold to ~2500ms will treat the 1200-2400ms of silence as a gap in the greeting but not the end of the greeting and will result in a machine detection. The downsides of such a change include:
- Increasing the delay for human detection by the amount you increase this parameter, e.g., a change of 1200ms to 2500ms increases human detection delay by 1300ms.
- Cases where a human has two utterances separated by a period of silence (e.g. a "Hello", then 2000ms of silence, and another "Hello") may be interpreted as a machine.
Decreasing this value will result in faster human detection. The consequence is that it can lead to increased False Human (detected human, actually machine) detections because a silence gap in a voicemail greeting (not necessarily just in short voicemail scenarios) can be incorrectly interpreted as the end of speech.
Check the Twilio docs for more info.
@default 1200
The number of milliseconds of initial silence after which an unknown AnsweredBy result will be returned. Default is 5000 milliseconds.
Increasing this value will result in waiting for a longer period of initial silence before returning an 'unknown' AMD result.
Decreasing this value will result in waiting for a shorter period of initial silence before returning an 'unknown' AMD result.
Check the Twilio docs for more info.
@default 5000
This is the message that the assistant will say if the call is forwarded to voicemail.
If unspecified, it will hang up.
This is the message that the assistant will say if it ends the call.
If unspecified, it will hang up without saying anything.
This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.
This is for metadata you want to store on the assistant.
This is the URL Vapi will communicate with via HTTP GET and POST Requests. This is used for retrieving context, function calling, and end-of-call reports.
All requests will be sent with the call object among other things relevant to that message. You can find more details in the Server URL documentation.
This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: tool.server.url > assistant.serverUrl > phoneNumber.serverUrl > org.serverUrl
This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.
Same precedence logic as serverUrl.
This is the plan for analysis of assistant's calls. Stored in call.analysis
.
This is the prompt that's used to summarize the call. The output is stored in call.analysis.summary
.
Default is "You are an expert note-taker. You will be given a transcript of a call. Summarize the call in 2-3 sentences, if applicable.".
Set to '' or 'off' to disable.
This is how long the request is tried before giving up. When request times out, call.analysis.summary
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is how long the request is tried before giving up. When request times out, call.analysis.structuredData
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is the prompt that's used to evaluate if the call was successful. The output is stored in call.analysis.successEvaluation
.
Default is "You are an expert call evaluator. You will be given a transcript of a call and the system prompt of the AI participant. Determine if the call was successful based on the objectives inferred from the system prompt.".
Set to '' or 'off' to disable.
You can use this standalone or in combination with successEvaluationRubric
. If both are provided, they are concatenated into appropriate instructions.
This enforces the rubric of the evaluation. The output is stored in call.analysis.successEvaluation
.
Options include:
- 'NumericScale': A scale of 1 to 10.
- 'DescriptiveScale': A scale of Excellent, Good, Fair, Poor.
- 'Checklist': A checklist of criteria and their status.
- 'Matrix': A grid that evaluates multiple criteria across different performance levels.
- 'PercentageScale': A scale of 0% to 100%.
- 'LikertScale': A scale of Strongly Agree, Agree, Neutral, Disagree, Strongly Disagree.
- 'AutomaticRubric': Automatically break down evaluation into several criteria, each with its own score.
- 'PassFail': A simple 'true' if call passed, 'false' if not.
For 'Checklist' and 'Matrix', provide the criteria in successEvaluationPrompt
.
Default is 'PassFail' if successEvaluationPrompt
is not provided, and null if successEvaluationPrompt
is provided.
You can use this standalone or in combination with successEvaluationPrompt
. If both are provided, they are concatenated into appropriate instructions.
NumericScale
, DescriptiveScale
, Checklist
, Matrix
, PercentageScale
, LikertScale
, AutomaticRubric
, PassFail
This is how long the request is tried before giving up. When request times out, call.analysis.successEvaluation
will be empty. Increasing this timeout will delay the end of call report.
Default is 5 seconds.
This is the prompt that's used to extract structured data from the call. The output is stored in call.analysis.structuredData
.
Disabled by default.
You can use this standalone or in combination with structuredDataSchema
. If both are provided, they are concatenated into appropriate instructions.
This enforces the schema of the structured data. This output is stored in call.analysis.structuredData
.
Complete guide on JSON Schema can be found here.
Disabled by default.
You can use this standalone or in combination with structuredDataPrompt
. If both are provided, they are concatenated into appropriate instructions.
This is the type of output you'd like.
string
, number
, integer
, boolean
are the primitive types and should be obvious.
array
and object
are more interesting and quite powerful. They allow you to define nested structures.
For array
, you can define the schema of the items in the array using the items
property.
For object
, you can define the properties of the object using the properties
property.
string
, number
, integer
, boolean
, array
, object
This is required if the type is "array". This is the schema of the items in the array.
This is of type JsonSchema. However, Swagger doesn't support circular references.
This is required if the type is "object". This specifies the properties of the object.
This is a map of string to JsonSchema. However, Swagger doesn't support circular references.
This is the description to help the model understand what it needs to output.
This is a list of properties that are required.
This only makes sense if the type is "object".
This is the plan for artifacts generated during assistant's calls. Stored in call.artifact
.
This determines whether the video is recorded during the call. Default is false. Only relevant for webCall
type.
This is the plan for static messages that can be spoken by the assistant during the call, like idleMessages
.
Note: firstMessage
, voicemailMessage
, and endCallMessage
are currently at the root level. They will be moved to messagePlan
in the future, but will remain backwards compatible.
This are the messages that the assistant will speak when the user hasn't responded for idleTimeoutSeconds
. Each time the timeout is triggered, a random message will be chosen from this array.
@default null (no idle message is spoken)
This determines the maximum number of times idleMessages
can be spoken during the call.
@default 3
This is the timeout in seconds before a message from idleMessages
is spoken. The clock starts when the assistant finishes speaking and remains active until the user speaks.
@default 7.5
This is the unique identifier for the squad.
This is the unique identifier for the org that this squad belongs to.
This is the ISO 8601 date-time string of when the squad was created.
This is the ISO 8601 date-time string of when the squad was last updated.
Was this page helpful?
curl --request GET \
--url https://api.vapi.ai/squad \
--header 'Authorization: Bearer <token>'
[
{
"name": "<string>",
"members": [
{
"assistantId": "<string>",
"assistant": {
"transcriber": {
"provider": "deepgram",
"model": "nova-2",
"language": "bg",
"smartFormat": true,
"keywords": [
"<string>"
]
},
"model": {
"messages": [
{
"content": "<string>",
"role": "assistant"
}
],
"tools": [
{
"async": true,
"messages": [
{
"type": "request-start",
"content": "<string>",
"conditions": [
{
"param": "<string>",
"value": "<string>",
"operator": "eq"
}
]
}
],
"type": "dtmf",
"function": {
"name": "<string>",
"description": "<string>",
"parameters": {
"type": "object",
"properties": {},
"required": [
"<string>"
]
}
},
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"secret": "<string>"
}
}
],
"toolIds": [
"<string>"
],
"provider": "anyscale",
"model": "<string>",
"temperature": 1,
"knowledgeBase": {
"provider": "canonical",
"topK": 5.5,
"fileIds": [
"<string>"
]
},
"maxTokens": 525,
"emotionRecognitionEnabled": true
},
"voice": {
"inputPreprocessingEnabled": true,
"inputReformattingEnabled": true,
"inputMinCharacters": 30,
"inputPunctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
")",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"fillerInjectionEnabled": true,
"provider": "azure",
"voiceId": "andrew",
"speed": 1.25
},
"firstMessageMode": "assistant-speaks-first",
"recordingEnabled": true,
"hipaaEnabled": true,
"clientMessages": [
"conversation-update",
"function-call",
"hang",
"model-output",
"speech-update",
"status-update",
"transcript",
"tool-calls",
"user-interrupted",
"voice-input"
],
"serverMessages": [
"conversation-update",
"end-of-call-report",
"function-call",
"hang",
"speech-update",
"status-update",
"tool-calls",
"transfer-destination-request",
"user-interrupted"
],
"silenceTimeoutSeconds": 30,
"responseDelaySeconds": 0.4,
"llmRequestDelaySeconds": 0.1,
"llmRequestNonPunctuatedDelaySeconds": 1.5,
"numWordsToInterruptAssistant": 5,
"maxDurationSeconds": 1800,
"backgroundSound": "office",
"backchannelingEnabled": true,
"backgroundDenoisingEnabled": true,
"modelOutputInMessagesEnabled": true,
"name": "<string>",
"firstMessage": "<string>",
"voicemailDetection": {
"provider": "twilio",
"voicemailDetectionTypes": [
"machine_end_beep",
"machine_end_silence"
],
"enabled": true,
"machineDetectionTimeout": 31,
"machineDetectionSpeechThreshold": 3500,
"machineDetectionSpeechEndThreshold": 2750,
"machineDetectionSilenceTimeout": 6000
},
"voicemailMessage": "<string>",
"endCallMessage": "<string>",
"endCallPhrases": [
"<string>"
],
"metadata": {},
"serverUrl": "<string>",
"serverUrlSecret": "<string>",
"analysisPlan": {
"summaryPrompt": "<string>",
"summaryRequestTimeoutSeconds": 10.5,
"structuredDataRequestTimeoutSeconds": 10.5,
"successEvaluationPrompt": "<string>",
"successEvaluationRubric": "NumericScale",
"successEvaluationRequestTimeoutSeconds": 10.5,
"structuredDataPrompt": "<string>",
"structuredDataSchema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"required": [
"<string>"
]
}
},
"artifactPlan": {
"videoRecordingEnabled": true
},
"messagePlan": {
"idleMessages": [
"<string>"
],
"idleMessageMaxSpokenCount": 5.5,
"idleTimeoutSeconds": 7.5
}
},
"assistantOverrides": {
"transcriber": {
"provider": "deepgram",
"model": "nova-2",
"language": "bg",
"smartFormat": true,
"keywords": [
"<string>"
]
},
"model": {
"messages": [
{
"content": "<string>",
"role": "assistant"
}
],
"tools": [
{
"async": true,
"messages": [
{
"type": "request-start",
"content": "<string>",
"conditions": [
{
"param": "<string>",
"value": "<string>",
"operator": "eq"
}
]
}
],
"type": "dtmf",
"function": {
"name": "<string>",
"description": "<string>",
"parameters": {
"type": "object",
"properties": {},
"required": [
"<string>"
]
}
},
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"secret": "<string>"
}
}
],
"toolIds": [
"<string>"
],
"provider": "anyscale",
"model": "<string>",
"temperature": 1,
"knowledgeBase": {
"provider": "canonical",
"topK": 5.5,
"fileIds": [
"<string>"
]
},
"maxTokens": 525,
"emotionRecognitionEnabled": true
},
"voice": {
"inputPreprocessingEnabled": true,
"inputReformattingEnabled": true,
"inputMinCharacters": 30,
"inputPunctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
")",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"fillerInjectionEnabled": true,
"provider": "azure",
"voiceId": "andrew",
"speed": 1.25
},
"firstMessageMode": "assistant-speaks-first",
"recordingEnabled": true,
"hipaaEnabled": true,
"clientMessages": [
"conversation-update",
"function-call",
"hang",
"model-output",
"speech-update",
"status-update",
"transcript",
"tool-calls",
"user-interrupted",
"voice-input"
],
"serverMessages": [
"conversation-update",
"end-of-call-report",
"function-call",
"hang",
"speech-update",
"status-update",
"tool-calls",
"transfer-destination-request",
"user-interrupted"
],
"silenceTimeoutSeconds": 30,
"responseDelaySeconds": 0.4,
"llmRequestDelaySeconds": 0.1,
"llmRequestNonPunctuatedDelaySeconds": 1.5,
"numWordsToInterruptAssistant": 5,
"maxDurationSeconds": 1800,
"backgroundSound": "office",
"backchannelingEnabled": true,
"backgroundDenoisingEnabled": true,
"modelOutputInMessagesEnabled": true,
"variableValues": {},
"name": "<string>",
"firstMessage": "<string>",
"voicemailDetection": {
"provider": "twilio",
"voicemailDetectionTypes": [
"machine_end_beep",
"machine_end_silence"
],
"enabled": true,
"machineDetectionTimeout": 31,
"machineDetectionSpeechThreshold": 3500,
"machineDetectionSpeechEndThreshold": 2750,
"machineDetectionSilenceTimeout": 6000
},
"voicemailMessage": "<string>",
"endCallMessage": "<string>",
"endCallPhrases": [
"<string>"
],
"metadata": {},
"serverUrl": "<string>",
"serverUrlSecret": "<string>",
"analysisPlan": {
"summaryPrompt": "<string>",
"summaryRequestTimeoutSeconds": 10.5,
"structuredDataRequestTimeoutSeconds": 10.5,
"successEvaluationPrompt": "<string>",
"successEvaluationRubric": "NumericScale",
"successEvaluationRequestTimeoutSeconds": 10.5,
"structuredDataPrompt": "<string>",
"structuredDataSchema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"required": [
"<string>"
]
}
},
"artifactPlan": {
"videoRecordingEnabled": true
},
"messagePlan": {
"idleMessages": [
"<string>"
],
"idleMessageMaxSpokenCount": 5.5,
"idleTimeoutSeconds": 7.5
}
},
"assistantDestinations": [
{
"type": "assistant",
"assistantName": "<string>",
"message": "<string>",
"description": "<string>"
}
]
}
],
"membersOverrides": {
"transcriber": {
"provider": "deepgram",
"model": "nova-2",
"language": "bg",
"smartFormat": true,
"keywords": [
"<string>"
]
},
"model": {
"messages": [
{
"content": "<string>",
"role": "assistant"
}
],
"tools": [
{
"async": true,
"messages": [
{
"type": "request-start",
"content": "<string>",
"conditions": [
{
"param": "<string>",
"value": "<string>",
"operator": "eq"
}
]
}
],
"type": "dtmf",
"function": {
"name": "<string>",
"description": "<string>",
"parameters": {
"type": "object",
"properties": {},
"required": [
"<string>"
]
}
},
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"secret": "<string>"
}
}
],
"toolIds": [
"<string>"
],
"provider": "anyscale",
"model": "<string>",
"temperature": 1,
"knowledgeBase": {
"provider": "canonical",
"topK": 5.5,
"fileIds": [
"<string>"
]
},
"maxTokens": 525,
"emotionRecognitionEnabled": true
},
"voice": {
"inputPreprocessingEnabled": true,
"inputReformattingEnabled": true,
"inputMinCharacters": 30,
"inputPunctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
")",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"fillerInjectionEnabled": true,
"provider": "azure",
"voiceId": "andrew",
"speed": 1.25
},
"firstMessageMode": "assistant-speaks-first",
"recordingEnabled": true,
"hipaaEnabled": true,
"clientMessages": [
"conversation-update",
"function-call",
"hang",
"model-output",
"speech-update",
"status-update",
"transcript",
"tool-calls",
"user-interrupted",
"voice-input"
],
"serverMessages": [
"conversation-update",
"end-of-call-report",
"function-call",
"hang",
"speech-update",
"status-update",
"tool-calls",
"transfer-destination-request",
"user-interrupted"
],
"silenceTimeoutSeconds": 30,
"responseDelaySeconds": 0.4,
"llmRequestDelaySeconds": 0.1,
"llmRequestNonPunctuatedDelaySeconds": 1.5,
"numWordsToInterruptAssistant": 5,
"maxDurationSeconds": 1800,
"backgroundSound": "office",
"backchannelingEnabled": true,
"backgroundDenoisingEnabled": true,
"modelOutputInMessagesEnabled": true,
"variableValues": {},
"name": "<string>",
"firstMessage": "<string>",
"voicemailDetection": {
"provider": "twilio",
"voicemailDetectionTypes": [
"machine_end_beep",
"machine_end_silence"
],
"enabled": true,
"machineDetectionTimeout": 31,
"machineDetectionSpeechThreshold": 3500,
"machineDetectionSpeechEndThreshold": 2750,
"machineDetectionSilenceTimeout": 6000
},
"voicemailMessage": "<string>",
"endCallMessage": "<string>",
"endCallPhrases": [
"<string>"
],
"metadata": {},
"serverUrl": "<string>",
"serverUrlSecret": "<string>",
"analysisPlan": {
"summaryPrompt": "<string>",
"summaryRequestTimeoutSeconds": 10.5,
"structuredDataRequestTimeoutSeconds": 10.5,
"successEvaluationPrompt": "<string>",
"successEvaluationRubric": "NumericScale",
"successEvaluationRequestTimeoutSeconds": 10.5,
"structuredDataPrompt": "<string>",
"structuredDataSchema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"required": [
"<string>"
]
}
},
"artifactPlan": {
"videoRecordingEnabled": true
},
"messagePlan": {
"idleMessages": [
"<string>"
],
"idleMessageMaxSpokenCount": 5.5,
"idleTimeoutSeconds": 7.5
}
},
"id": "<string>",
"orgId": "<string>",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
]