POST
/
assistant
curl --request POST \
  --url https://api.vapi.ai/assistant \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '{
  "transcriber": {
    "provider": "deepgram",
    "model": "nova-2",
    "language": "bg",
    "smartFormat": true,
    "keywords": [
      "<string>"
    ]
  },
  "model": {
    "messages": [
      {
        "content": "<string>",
        "role": "assistant"
      }
    ],
    "tools": [
      {
        "async": true,
        "messages": [
          {
            "type": "request-start",
            "content": "<string>",
            "conditions": [
              {
                "param": "<string>",
                "value": "<string>",
                "operator": "eq"
              }
            ]
          }
        ],
        "type": "dtmf",
        "function": {
          "name": "<string>",
          "description": "<string>",
          "parameters": {
            "type": "object",
            "properties": {},
            "required": [
              "<string>"
            ]
          }
        },
        "server": {
          "timeoutSeconds": 20,
          "url": "<string>",
          "secret": "<string>"
        }
      }
    ],
    "toolIds": [
      "<string>"
    ],
    "provider": "anyscale",
    "model": "<string>",
    "temperature": 1,
    "knowledgeBase": {
      "provider": "canonical",
      "topK": 5.5,
      "fileIds": [
        "<string>"
      ]
    },
    "maxTokens": 525,
    "emotionRecognitionEnabled": true
  },
  "voice": {
    "inputPreprocessingEnabled": true,
    "inputReformattingEnabled": true,
    "inputMinCharacters": 30,
    "inputPunctuationBoundaries": [
      "。",
      ",",
      ".",
      "!",
      "?",
      ";",
      ")",
      "،",
      "۔",
      "।",
      "॥",
      "|",
      "||",
      ",",
      ":"
    ],
    "fillerInjectionEnabled": true,
    "provider": "azure",
    "voiceId": "andrew",
    "speed": 1.25
  },
  "firstMessageMode": "assistant-speaks-first",
  "recordingEnabled": true,
  "hipaaEnabled": true,
  "clientMessages": [
    "conversation-update",
    "function-call",
    "hang",
    "model-output",
    "speech-update",
    "status-update",
    "transcript",
    "tool-calls",
    "user-interrupted",
    "voice-input"
  ],
  "serverMessages": [
    "conversation-update",
    "end-of-call-report",
    "function-call",
    "hang",
    "speech-update",
    "status-update",
    "tool-calls",
    "transfer-destination-request",
    "user-interrupted"
  ],
  "silenceTimeoutSeconds": 30,
  "responseDelaySeconds": 0.4,
  "llmRequestDelaySeconds": 0.1,
  "numWordsToInterruptAssistant": 5,
  "maxDurationSeconds": 1800,
  "backgroundSound": "office",
  "backchannelingEnabled": true,
  "backgroundDenoisingEnabled": true,
  "modelOutputInMessagesEnabled": true,
  "name": "<string>",
  "firstMessage": "<string>",
  "voicemailDetection": {
    "provider": "twilio",
    "voicemailDetectionTypes": [
      "machine_end_beep",
      "machine_end_silence"
    ],
    "enabled": true,
    "machineDetectionTimeout": 31,
    "machineDetectionSpeechThreshold": 3500,
    "machineDetectionSpeechEndThreshold": 2750,
    "machineDetectionSilenceTimeout": 6000
  },
  "voicemailMessage": "<string>",
  "endCallMessage": "<string>",
  "endCallPhrases": [
    "<string>"
  ],
  "metadata": {},
  "serverUrl": "<string>",
  "serverUrlSecret": "<string>",
  "analysisPlan": {
    "summaryPrompt": "<string>",
    "summaryRequestTimeoutSeconds": 10.5,
    "structuredDataRequestTimeoutSeconds": 10.5,
    "successEvaluationPrompt": "<string>",
    "successEvaluationRubric": "NumericScale",
    "successEvaluationRequestTimeoutSeconds": 10.5,
    "structuredDataPrompt": "<string>",
    "structuredDataSchema": {
      "type": "string",
      "items": {},
      "properties": {},
      "description": "<string>",
      "required": [
        "<string>"
      ]
    }
  },
  "artifactPlan": {
    "videoRecordingEnabled": true
  },
  "messagePlan": {
    "idleMessages": [
      "<string>"
    ],
    "idleMessageMaxSpokenCount": 5.5,
    "idleTimeoutSeconds": 7.5
  }
}'
{
  "transcriber": {
    "provider": "deepgram",
    "model": "nova-2",
    "language": "bg",
    "smartFormat": true,
    "keywords": [
      "<string>"
    ]
  },
  "model": {
    "messages": [
      {
        "content": "<string>",
        "role": "assistant"
      }
    ],
    "tools": [
      {
        "async": true,
        "messages": [
          {
            "type": "request-start",
            "content": "<string>",
            "conditions": [
              {
                "param": "<string>",
                "value": "<string>",
                "operator": "eq"
              }
            ]
          }
        ],
        "type": "dtmf",
        "function": {
          "name": "<string>",
          "description": "<string>",
          "parameters": {
            "type": "object",
            "properties": {},
            "required": [
              "<string>"
            ]
          }
        },
        "server": {
          "timeoutSeconds": 20,
          "url": "<string>",
          "secret": "<string>"
        }
      }
    ],
    "toolIds": [
      "<string>"
    ],
    "provider": "anyscale",
    "model": "<string>",
    "temperature": 1,
    "knowledgeBase": {
      "provider": "canonical",
      "topK": 5.5,
      "fileIds": [
        "<string>"
      ]
    },
    "maxTokens": 525,
    "emotionRecognitionEnabled": true
  },
  "voice": {
    "inputPreprocessingEnabled": true,
    "inputReformattingEnabled": true,
    "inputMinCharacters": 30,
    "inputPunctuationBoundaries": [
      "。",
      ",",
      ".",
      "!",
      "?",
      ";",
      ")",
      "،",
      "۔",
      "।",
      "॥",
      "|",
      "||",
      ",",
      ":"
    ],
    "fillerInjectionEnabled": true,
    "provider": "azure",
    "voiceId": "andrew",
    "speed": 1.25
  },
  "firstMessageMode": "assistant-speaks-first",
  "recordingEnabled": true,
  "hipaaEnabled": true,
  "clientMessages": [
    "conversation-update",
    "function-call",
    "hang",
    "model-output",
    "speech-update",
    "status-update",
    "transcript",
    "tool-calls",
    "user-interrupted",
    "voice-input"
  ],
  "serverMessages": [
    "conversation-update",
    "end-of-call-report",
    "function-call",
    "hang",
    "speech-update",
    "status-update",
    "tool-calls",
    "transfer-destination-request",
    "user-interrupted"
  ],
  "silenceTimeoutSeconds": 30,
  "responseDelaySeconds": 0.4,
  "llmRequestDelaySeconds": 0.1,
  "numWordsToInterruptAssistant": 5,
  "maxDurationSeconds": 1800,
  "backgroundSound": "office",
  "backchannelingEnabled": true,
  "backgroundDenoisingEnabled": true,
  "modelOutputInMessagesEnabled": true,
  "isServerUrlSecretSet": {},
  "name": "<string>",
  "firstMessage": "<string>",
  "voicemailDetection": {
    "provider": "twilio",
    "voicemailDetectionTypes": [
      "machine_end_beep",
      "machine_end_silence"
    ],
    "enabled": true,
    "machineDetectionTimeout": 31,
    "machineDetectionSpeechThreshold": 3500,
    "machineDetectionSpeechEndThreshold": 2750,
    "machineDetectionSilenceTimeout": 6000
  },
  "voicemailMessage": "<string>",
  "endCallMessage": "<string>",
  "endCallPhrases": [
    "<string>"
  ],
  "metadata": {},
  "serverUrl": "<string>",
  "serverUrlSecret": "<string>",
  "analysisPlan": {
    "summaryPrompt": "<string>",
    "summaryRequestTimeoutSeconds": 10.5,
    "structuredDataRequestTimeoutSeconds": 10.5,
    "successEvaluationPrompt": "<string>",
    "successEvaluationRubric": "NumericScale",
    "successEvaluationRequestTimeoutSeconds": 10.5,
    "structuredDataPrompt": "<string>",
    "structuredDataSchema": {
      "type": "string",
      "items": {},
      "properties": {},
      "description": "<string>",
      "required": [
        "<string>"
      ]
    }
  },
  "artifactPlan": {
    "videoRecordingEnabled": true
  },
  "messagePlan": {
    "idleMessages": [
      "<string>"
    ],
    "idleMessageMaxSpokenCount": 5.5,
    "idleTimeoutSeconds": 7.5
  },
  "id": "<string>",
  "orgId": "<string>",
  "createdAt": "2023-11-07T05:31:56Z",
  "updatedAt": "2023-11-07T05:31:56Z"
}

Authorizations

Authorization
string
headerrequired

Retrieve your API Key from Dashboard.

Body

application/json
transcriber
object

These are the options for the assistant's transcriber.

model
object

These are the options for the assistant's LLM.

voice
object

These are the options for the assistant's voice.

firstMessageMode
enum<string>

This is the mode for the first message. Default is 'assistant-speaks-first'.

Use:

  • 'assistant-speaks-first' to have the assistant speak first.
  • 'assistant-waits-for-user' to have the assistant wait for the user to speak first.
  • 'assistant-speaks-first-with-model-generated-message' to have the assistant speak first with a message generated by the model based on the conversation state. (assistant.model.messages at call start, call.messages at squad transfer points).

@default 'assistant-speaks-first'

Available options:
assistant-speaks-first,
assistant-speaks-first-with-model-generated-message,
assistant-waits-for-user
recordingEnabled
boolean

This sets whether the assistant's calls are recorded. Defaults to true.

hipaaEnabled
boolean

When this is enabled, no logs, recordings, or transcriptions will be stored. At the end of the call, you will still receive an end-of-call-report message to store on your server. Defaults to false.

clientMessages
enum<string>[]

These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transcript,tool-calls,user-interrupted,voice-input. You can check the shape of the messages in ClientMessage schema.

Available options:
conversation-update,
function-call,
function-call-result,
hang,
metadata,
model-output,
speech-update,
status-update,
transcript,
tool-calls,
tool-calls-result,
user-interrupted,
voice-input
serverMessages
enum<string>[]

These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,user-interrupted. You can check the shape of the messages in ServerMessage schema.

Available options:
conversation-update,
end-of-call-report,
function-call,
hang,
model-output,
phone-call-control,
speech-update,
status-update,
transcript,
tool-calls,
transfer-destination-request,
user-interrupted,
voice-input
silenceTimeoutSeconds
number

How many seconds of silence to wait before ending the call. Defaults to 30.

@default 30

responseDelaySeconds
number

The minimum number of seconds after user speech to wait before the assistant starts speaking. Defaults to 0.4.

@default 0.4

llmRequestDelaySeconds
number

The minimum number of seconds to wait after punctuation before sending a request to the LLM. Defaults to 0.1.

@default 0.1

numWordsToInterruptAssistant
number

The number of words to wait for before interrupting the assistant.

Words like "stop", "actually", "no", etc. will always interrupt immediately regardless of this value.

Words like "okay", "yeah", "right" will never interrupt.

When set to 0, it will rely solely on the VAD (Voice Activity Detector) and will not wait for any transcription. Defaults to this (0).

@default 0

maxDurationSeconds
number

This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.

@default 1800 (~30 minutes)

backgroundSound
enum<string>

This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'.

Available options:
off,
office
backchannelingEnabled
boolean

This determines whether the model says 'mhmm', 'ahem' etc. while user is speaking.

Default false while in beta.

@default false

backgroundDenoisingEnabled
boolean

This enables filtering of noise and background speech while the user is talking.

Default false while in beta.

@default false

modelOutputInMessagesEnabled
boolean

This determines whether the model's output is used in conversation history rather than the transcription of assistant's speech.

Default false while in beta.

@default false

name
string

This is the name of the assistant.

This is required when you want to transfer between assistants in a call.

firstMessage
string

This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).

If unspecified, assistant will wait for user to speak and use the model to respond once they speak.

voicemailDetection
object

These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool]. This uses Twilio's built-in detection while the VoicemailTool relies on the model to detect if a voicemail was reached. You can use neither of them, one of them, or both of them. By default, Twilio built-in detection is enabled while VoicemailTool is not.

voicemailMessage
string

This is the message that the assistant will say if the call is forwarded to voicemail.

If unspecified, it will hang up.

endCallMessage
string

This is the message that the assistant will say if it ends the call.

If unspecified, it will hang up without saying anything.

endCallPhrases
string[]

This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.

metadata
object

This is for metadata you want to store on the assistant.

serverUrl
string

This is the URL Vapi will communicate with via HTTP GET and POST Requests. This is used for retrieving context, function calling, and end-of-call reports.

All requests will be sent with the call object among other things relevant to that message. You can find more details in the Server URL documentation.

This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: tool.server.url > assistant.serverUrl > phoneNumber.serverUrl > org.serverUrl

serverUrlSecret
string

This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.

Same precedence logic as serverUrl.

analysisPlan
object

This is the plan for analysis of assistant's calls. Stored in call.analysis.

artifactPlan
object

This is the plan for artifacts generated during assistant's calls. Stored in call.artifact.

messagePlan
object

This is the plan for static messages that can be spoken by the assistant during the call, like idleMessages.

Note: firstMessage, voicemailMessage, and endCallMessage are currently at the root level. They will be moved to messagePlan in the future, but will remain backwards compatible.

Response

201 - application/json
transcriber
object

These are the options for the assistant's transcriber.

model
object

These are the options for the assistant's LLM.

voice
object

These are the options for the assistant's voice.

firstMessageMode
enum<string>

This is the mode for the first message. Default is 'assistant-speaks-first'.

Use:

  • 'assistant-speaks-first' to have the assistant speak first.
  • 'assistant-waits-for-user' to have the assistant wait for the user to speak first.
  • 'assistant-speaks-first-with-model-generated-message' to have the assistant speak first with a message generated by the model based on the conversation state. (assistant.model.messages at call start, call.messages at squad transfer points).

@default 'assistant-speaks-first'

Available options:
assistant-speaks-first,
assistant-speaks-first-with-model-generated-message,
assistant-waits-for-user
recordingEnabled
boolean

This sets whether the assistant's calls are recorded. Defaults to true.

hipaaEnabled
boolean

When this is enabled, no logs, recordings, or transcriptions will be stored. At the end of the call, you will still receive an end-of-call-report message to store on your server. Defaults to false.

clientMessages
enum<string>[]

These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transcript,tool-calls,user-interrupted,voice-input. You can check the shape of the messages in ClientMessage schema.

Available options:
conversation-update,
function-call,
function-call-result,
hang,
metadata,
model-output,
speech-update,
status-update,
transcript,
tool-calls,
tool-calls-result,
user-interrupted,
voice-input
serverMessages
enum<string>[]

These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,user-interrupted. You can check the shape of the messages in ServerMessage schema.

Available options:
conversation-update,
end-of-call-report,
function-call,
hang,
model-output,
phone-call-control,
speech-update,
status-update,
transcript,
tool-calls,
transfer-destination-request,
user-interrupted,
voice-input
silenceTimeoutSeconds
number

How many seconds of silence to wait before ending the call. Defaults to 30.

@default 30

responseDelaySeconds
number

The minimum number of seconds after user speech to wait before the assistant starts speaking. Defaults to 0.4.

@default 0.4

llmRequestDelaySeconds
number

The minimum number of seconds to wait after punctuation before sending a request to the LLM. Defaults to 0.1.

@default 0.1

numWordsToInterruptAssistant
number

The number of words to wait for before interrupting the assistant.

Words like "stop", "actually", "no", etc. will always interrupt immediately regardless of this value.

Words like "okay", "yeah", "right" will never interrupt.

When set to 0, it will rely solely on the VAD (Voice Activity Detector) and will not wait for any transcription. Defaults to this (0).

@default 0

maxDurationSeconds
number

This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.

@default 1800 (~30 minutes)

backgroundSound
enum<string>

This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'.

Available options:
off,
office
backchannelingEnabled
boolean

This determines whether the model says 'mhmm', 'ahem' etc. while user is speaking.

Default false while in beta.

@default false

backgroundDenoisingEnabled
boolean

This enables filtering of noise and background speech while the user is talking.

Default false while in beta.

@default false

modelOutputInMessagesEnabled
boolean

This determines whether the model's output is used in conversation history rather than the transcription of assistant's speech.

Default false while in beta.

@default false

isServerUrlSecretSet
object
required
name
string

This is the name of the assistant.

This is required when you want to transfer between assistants in a call.

firstMessage
string

This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).

If unspecified, assistant will wait for user to speak and use the model to respond once they speak.

voicemailDetection
object

These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool]. This uses Twilio's built-in detection while the VoicemailTool relies on the model to detect if a voicemail was reached. You can use neither of them, one of them, or both of them. By default, Twilio built-in detection is enabled while VoicemailTool is not.

voicemailMessage
string

This is the message that the assistant will say if the call is forwarded to voicemail.

If unspecified, it will hang up.

endCallMessage
string

This is the message that the assistant will say if it ends the call.

If unspecified, it will hang up without saying anything.

endCallPhrases
string[]

This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.

metadata
object

This is for metadata you want to store on the assistant.

serverUrl
string

This is the URL Vapi will communicate with via HTTP GET and POST Requests. This is used for retrieving context, function calling, and end-of-call reports.

All requests will be sent with the call object among other things relevant to that message. You can find more details in the Server URL documentation.

This overrides the serverUrl set on the org and the phoneNumber. Order of precedence: tool.server.url > assistant.serverUrl > phoneNumber.serverUrl > org.serverUrl

serverUrlSecret
string

This is the secret you can set that Vapi will send with every request to your server. Will be sent as a header called x-vapi-secret.

Same precedence logic as serverUrl.

analysisPlan
object

This is the plan for analysis of assistant's calls. Stored in call.analysis.

artifactPlan
object

This is the plan for artifacts generated during assistant's calls. Stored in call.artifact.

messagePlan
object

This is the plan for static messages that can be spoken by the assistant during the call, like idleMessages.

Note: firstMessage, voicemailMessage, and endCallMessage are currently at the root level. They will be moved to messagePlan in the future, but will remain backwards compatible.

id
string
required

This is the unique identifier for the assistant.

orgId
string
required

This is the unique identifier for the org that this assistant belongs to.

createdAt
string
required

This is the ISO 8601 date-time string of when the assistant was created.

updatedAt
string
required

This is the ISO 8601 date-time string of when the assistant was last updated.