> For clean Markdown of any page, append .md to the page URL.
> For a complete documentation index, see https://docs.vapi.ai/llms.txt.
> For full documentation content, see https://docs.vapi.ai/llms-full.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.vapi.ai/_mcp/server.

# Update Eval

PATCH https://api.vapi.ai/eval/{id}
Content-Type: application/json

Reference: https://docs.vapi.ai/api-reference/eval/eval-controller-update

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: api
  version: 1.0.0
paths:
  /eval/{id}:
    patch:
      operationId: eval-controller-update
      summary: Update Eval
      tags:
        - subpackage_eval
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
        - name: Authorization
          in: header
          description: Retrieve your API Key from [Dashboard](dashboard.vapi.ai).
          required: true
          schema:
            type: string
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Eval'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateEvalDTO'
servers:
  - url: https://api.vapi.ai
components:
  schemas:
    ChatEvalAssistantMessageMockRole:
      type: string
      enum:
        - assistant
      default: assistant
      description: |-
        This is the role of the message author.
        For a mock assistant message, the role is always 'assistant'
        @default 'assistant'
      title: ChatEvalAssistantMessageMockRole
    ChatEvalAssistantMessageMockToolCallArguments:
      type: object
      properties: {}
      description: This is the arguments that will be passed to the tool call.
      title: ChatEvalAssistantMessageMockToolCallArguments
    ChatEvalAssistantMessageMockToolCall:
      type: object
      properties:
        name:
          type: string
          description: |-
            This is the name of the tool that will be called.
            It should be one of the tools created in the organization.
        arguments:
          $ref: '#/components/schemas/ChatEvalAssistantMessageMockToolCallArguments'
          description: This is the arguments that will be passed to the tool call.
      required:
        - name
      title: ChatEvalAssistantMessageMockToolCall
    ChatEvalAssistantMessageMock:
      type: object
      properties:
        role:
          $ref: '#/components/schemas/ChatEvalAssistantMessageMockRole'
          description: |-
            This is the role of the message author.
            For a mock assistant message, the role is always 'assistant'
            @default 'assistant'
        content:
          type: string
          description: |-
            This is the content of the assistant message.
            This is the message that the assistant would have sent.
        toolCalls:
          type: array
          items:
            $ref: '#/components/schemas/ChatEvalAssistantMessageMockToolCall'
          description: This is the tool calls that will be made by the assistant.
      required:
        - role
      title: ChatEvalAssistantMessageMock
    ChatEvalSystemMessageMockRole:
      type: string
      enum:
        - system
      default: system
      description: |-
        This is the role of the message author.
        For a mock system message, the role is always 'system'
        @default 'system'
      title: ChatEvalSystemMessageMockRole
    ChatEvalSystemMessageMock:
      type: object
      properties:
        role:
          $ref: '#/components/schemas/ChatEvalSystemMessageMockRole'
          description: |-
            This is the role of the message author.
            For a mock system message, the role is always 'system'
            @default 'system'
        content:
          type: string
          description: >-
            This is the content of the system message that would have been added
            in the middle of the conversation.

            Do not include the assistant prompt as a part of this message. It
            will automatically be fetched during runtime.
      required:
        - role
        - content
      title: ChatEvalSystemMessageMock
    ChatEvalToolResponseMessageMockRole:
      type: string
      enum:
        - tool
      default: tool
      description: |-
        This is the role of the message author.
        For a mock tool response message, the role is always 'tool'
        @default 'tool'
      title: ChatEvalToolResponseMessageMockRole
    ChatEvalToolResponseMessageMock:
      type: object
      properties:
        role:
          $ref: '#/components/schemas/ChatEvalToolResponseMessageMockRole'
          description: |-
            This is the role of the message author.
            For a mock tool response message, the role is always 'tool'
            @default 'tool'
        content:
          type: string
          description: >-
            This is the content of the tool response message. JSON Objects
            should be stringified.
      required:
        - role
        - content
      title: ChatEvalToolResponseMessageMock
    ChatEvalToolResponseMessageEvaluationRole:
      type: string
      enum:
        - tool
      default: tool
      description: |-
        This is the role of the message author.
        For a tool response message evaluation, the role is always 'tool'
        @default 'tool'
      title: ChatEvalToolResponseMessageEvaluationRole
    EvalOpenAiModelProvider:
      type: string
      enum:
        - openai
      description: This is the provider of the model (`openai`).
      title: EvalOpenAiModelProvider
    EvalOpenAiModelModel:
      type: string
      enum:
        - gpt-5.4
        - gpt-5.4-mini
        - gpt-5.4-nano
        - gpt-5.2
        - gpt-5.2-chat-latest
        - gpt-5.1
        - gpt-5.1-chat-latest
        - gpt-5
        - gpt-5-chat-latest
        - gpt-5-mini
        - gpt-5-nano
        - gpt-4.1-2025-04-14
        - gpt-4.1-mini-2025-04-14
        - gpt-4.1-nano-2025-04-14
        - gpt-4.1
        - gpt-4.1-mini
        - gpt-4.1-nano
        - chatgpt-4o-latest
        - o3
        - o3-mini
        - o4-mini
        - o1-mini
        - o1-mini-2024-09-12
        - gpt-4o-mini-2024-07-18
        - gpt-4o-mini
        - gpt-4o
        - gpt-4o-2024-05-13
        - gpt-4o-2024-08-06
        - gpt-4o-2024-11-20
        - gpt-4-turbo
        - gpt-4-turbo-2024-04-09
        - gpt-4-turbo-preview
        - gpt-4-0125-preview
        - gpt-4-1106-preview
        - gpt-4
        - gpt-4-0613
        - gpt-3.5-turbo
        - gpt-3.5-turbo-0125
        - gpt-3.5-turbo-1106
        - gpt-3.5-turbo-16k
        - gpt-3.5-turbo-0613
        - gpt-4.1-2025-04-14:westus
        - gpt-4.1-2025-04-14:eastus2
        - gpt-4.1-2025-04-14:eastus
        - gpt-4.1-2025-04-14:westus3
        - gpt-4.1-2025-04-14:northcentralus
        - gpt-4.1-2025-04-14:southcentralus
        - gpt-4.1-2025-04-14:westeurope
        - gpt-4.1-2025-04-14:germanywestcentral
        - gpt-4.1-2025-04-14:polandcentral
        - gpt-4.1-2025-04-14:spaincentral
        - gpt-4.1-mini-2025-04-14:westus
        - gpt-4.1-mini-2025-04-14:eastus2
        - gpt-4.1-mini-2025-04-14:eastus
        - gpt-4.1-mini-2025-04-14:westus3
        - gpt-4.1-mini-2025-04-14:northcentralus
        - gpt-4.1-mini-2025-04-14:southcentralus
        - gpt-4.1-mini-2025-04-14:westeurope
        - gpt-4.1-mini-2025-04-14:germanywestcentral
        - gpt-4.1-mini-2025-04-14:polandcentral
        - gpt-4.1-mini-2025-04-14:spaincentral
        - gpt-4.1-nano-2025-04-14:westus
        - gpt-4.1-nano-2025-04-14:eastus2
        - gpt-4.1-nano-2025-04-14:westus3
        - gpt-4.1-nano-2025-04-14:northcentralus
        - gpt-4.1-nano-2025-04-14:southcentralus
        - gpt-4o-2024-11-20:swedencentral
        - gpt-4o-2024-11-20:westus
        - gpt-4o-2024-11-20:eastus2
        - gpt-4o-2024-11-20:eastus
        - gpt-4o-2024-11-20:westus3
        - gpt-4o-2024-11-20:southcentralus
        - gpt-4o-2024-11-20:westeurope
        - gpt-4o-2024-11-20:germanywestcentral
        - gpt-4o-2024-11-20:polandcentral
        - gpt-4o-2024-11-20:spaincentral
        - gpt-4o-2024-08-06:westus
        - gpt-4o-2024-08-06:westus3
        - gpt-4o-2024-08-06:eastus
        - gpt-4o-2024-08-06:eastus2
        - gpt-4o-2024-08-06:northcentralus
        - gpt-4o-2024-08-06:southcentralus
        - gpt-4o-mini-2024-07-18:westus
        - gpt-4o-mini-2024-07-18:westus3
        - gpt-4o-mini-2024-07-18:eastus
        - gpt-4o-mini-2024-07-18:eastus2
        - gpt-4o-mini-2024-07-18:northcentralus
        - gpt-4o-mini-2024-07-18:southcentralus
        - gpt-4o-2024-05-13:eastus2
        - gpt-4o-2024-05-13:eastus
        - gpt-4o-2024-05-13:northcentralus
        - gpt-4o-2024-05-13:southcentralus
        - gpt-4o-2024-05-13:westus3
        - gpt-4o-2024-05-13:westus
        - gpt-4-turbo-2024-04-09:eastus2
        - gpt-4-0125-preview:eastus
        - gpt-4-0125-preview:northcentralus
        - gpt-4-0125-preview:southcentralus
        - gpt-4-1106-preview:australiaeast
        - gpt-4-1106-preview:canadaeast
        - gpt-4-1106-preview:france
        - gpt-4-1106-preview:india
        - gpt-4-1106-preview:norway
        - gpt-4-1106-preview:swedencentral
        - gpt-4-1106-preview:uk
        - gpt-4-1106-preview:westus
        - gpt-4-1106-preview:westus3
        - gpt-4-0613:canadaeast
        - gpt-3.5-turbo-0125:canadaeast
        - gpt-3.5-turbo-0125:northcentralus
        - gpt-3.5-turbo-0125:southcentralus
        - gpt-3.5-turbo-1106:canadaeast
        - gpt-3.5-turbo-1106:westus
      description: >-
        This is the OpenAI model that will be used.


        When using Vapi OpenAI or your own Azure Credentials, you have the
        option to specify the region for the selected model. This shouldn't be
        specified unless you have a specific reason to do so. Vapi will
        automatically find the fastest region that make sense.

        This is helpful when you are required to comply with Data Residency
        rules. Learn more about Azure regions here
        https://azure.microsoft.com/en-us/explore/global-infrastructure/data-residency/.
      title: EvalOpenAiModelModel
    EvalOpenAiModelMessagesItems:
      type: object
      properties: {}
      title: EvalOpenAiModelMessagesItems
    EvalOpenAIModel:
      type: object
      properties:
        provider:
          $ref: '#/components/schemas/EvalOpenAiModelProvider'
          description: This is the provider of the model (`openai`).
        model:
          $ref: '#/components/schemas/EvalOpenAiModelModel'
          description: >-
            This is the OpenAI model that will be used.


            When using Vapi OpenAI or your own Azure Credentials, you have the
            option to specify the region for the selected model. This shouldn't
            be specified unless you have a specific reason to do so. Vapi will
            automatically find the fastest region that make sense.

            This is helpful when you are required to comply with Data Residency
            rules. Learn more about Azure regions here
            https://azure.microsoft.com/en-us/explore/global-infrastructure/data-residency/.
        temperature:
          type: number
          format: double
          description: >-
            This is the temperature of the model. For LLM-as-a-judge, it's
            recommended to set it between 0 - 0.3 to avoid hallucinations and
            ensure the model judges the output correctly based on the
            instructions.
        maxTokens:
          type: number
          format: double
          description: >-
            This is the max tokens of the model.

            If your Judge instructions return `true` or `false` takes only 1
            token (as per the OpenAI Tokenizer), and therefore is recommended to
            set it to a low number to force the model to return a short
            response.
        messages:
          type: array
          items:
            $ref: '#/components/schemas/EvalOpenAiModelMessagesItems'
          description: >-
            These are the messages which will instruct the AI Judge on how to
            evaluate the assistant message.

            The LLM-Judge must respond with "pass" or "fail" to indicate if the
            assistant message passes the eval.


            To access the messages in the mock conversation, use the LiquidJS
            variable `{{messages}}`.

            The assistant message to be evaluated will be passed as the last
            message in the `messages` array and can be accessed using
            `{{messages[-1]}}`.


            It is recommended to use the system message to instruct the LLM how
            to evaluate the assistant message, and then use the first user
            message to pass the assistant message to be evaluated.
      required:
        - provider
        - model
        - messages
      title: EvalOpenAIModel
    EvalAnthropicModelProvider:
      type: string
      enum:
        - anthropic
      description: This is the provider of the model (`anthropic`).
      title: EvalAnthropicModelProvider
    EvalAnthropicModelModel:
      type: string
      enum:
        - claude-3-opus-20240229
        - claude-3-sonnet-20240229
        - claude-3-haiku-20240307
        - claude-3-5-sonnet-20240620
        - claude-3-5-sonnet-20241022
        - claude-3-5-haiku-20241022
        - claude-3-7-sonnet-20250219
        - claude-opus-4-20250514
        - claude-opus-4-5-20251101
        - claude-opus-4-6
        - claude-sonnet-4-20250514
        - claude-sonnet-4-5-20250929
        - claude-sonnet-4-6
        - claude-haiku-4-5-20251001
      description: This is the specific model that will be used.
      title: EvalAnthropicModelModel
    AnthropicThinkingConfigType:
      type: string
      enum:
        - enabled
      title: AnthropicThinkingConfigType
    AnthropicThinkingConfig:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/AnthropicThinkingConfigType'
        budgetTokens:
          type: number
          format: double
          description: |-
            The maximum number of tokens to allocate for thinking.
            Must be between 1024 and 100000 tokens.
      required:
        - type
        - budgetTokens
      title: AnthropicThinkingConfig
    EvalAnthropicModelMessagesItems:
      type: object
      properties: {}
      title: EvalAnthropicModelMessagesItems
    EvalAnthropicModel:
      type: object
      properties:
        provider:
          $ref: '#/components/schemas/EvalAnthropicModelProvider'
          description: This is the provider of the model (`anthropic`).
        model:
          $ref: '#/components/schemas/EvalAnthropicModelModel'
          description: This is the specific model that will be used.
        thinking:
          $ref: '#/components/schemas/AnthropicThinkingConfig'
          description: >-
            This is the optional configuration for Anthropic's thinking feature.


            - If provided, `maxTokens` must be greater than
            `thinking.budgetTokens`.
        temperature:
          type: number
          format: double
          description: >-
            This is the temperature of the model. For LLM-as-a-judge, it's
            recommended to set it between 0 - 0.3 to avoid hallucinations and
            ensure the model judges the output correctly based on the
            instructions.
        maxTokens:
          type: number
          format: double
          description: >-
            This is the max tokens of the model.

            If your Judge instructions return `true` or `false` takes only 1
            token (as per the OpenAI Tokenizer), and therefore is recommended to
            set it to a low number to force the model to return a short
            response.
        messages:
          type: array
          items:
            $ref: '#/components/schemas/EvalAnthropicModelMessagesItems'
          description: >-
            These are the messages which will instruct the AI Judge on how to
            evaluate the assistant message.

            The LLM-Judge must respond with "pass" or "fail" to indicate if the
            assistant message passes the eval.


            To access the messages in the mock conversation, use the LiquidJS
            variable `{{messages}}`.

            The assistant message to be evaluated will be passed as the last
            message in the `messages` array and can be accessed using
            `{{messages[-1]}}`.


            It is recommended to use the system message to instruct the LLM how
            to evaluate the assistant message, and then use the first user
            message to pass the assistant message to be evaluated.
      required:
        - provider
        - model
        - messages
      title: EvalAnthropicModel
    EvalGoogleModelProvider:
      type: string
      enum:
        - google
      description: This is the provider of the model (`google`).
      title: EvalGoogleModelProvider
    EvalGoogleModelModel:
      type: string
      enum:
        - gemini-3-flash-preview
        - gemini-2.5-pro
        - gemini-2.5-flash
        - gemini-2.5-flash-lite
        - gemini-2.0-flash-thinking-exp
        - gemini-2.0-pro-exp-02-05
        - gemini-2.0-flash
        - gemini-2.0-flash-lite
        - gemini-2.0-flash-exp
        - gemini-2.0-flash-realtime-exp
        - gemini-1.5-flash
        - gemini-1.5-flash-002
        - gemini-1.5-pro
        - gemini-1.5-pro-002
        - gemini-1.0-pro
      description: This is the name of the model. Ex. gpt-4o
      title: EvalGoogleModelModel
    EvalGoogleModelMessagesItems:
      type: object
      properties: {}
      title: EvalGoogleModelMessagesItems
    EvalGoogleModel:
      type: object
      properties:
        provider:
          $ref: '#/components/schemas/EvalGoogleModelProvider'
          description: This is the provider of the model (`google`).
        model:
          $ref: '#/components/schemas/EvalGoogleModelModel'
          description: This is the name of the model. Ex. gpt-4o
        temperature:
          type: number
          format: double
          description: >-
            This is the temperature of the model. For LLM-as-a-judge, it's
            recommended to set it between 0 - 0.3 to avoid hallucinations and
            ensure the model judges the output correctly based on the
            instructions.
        maxTokens:
          type: number
          format: double
          description: >-
            This is the max tokens of the model.

            If your Judge instructions return `true` or `false` takes only 1
            token (as per the OpenAI Tokenizer), and therefore is recommended to
            set it to a low number to force the model to return a short
            response.
        messages:
          type: array
          items:
            $ref: '#/components/schemas/EvalGoogleModelMessagesItems'
          description: >-
            These are the messages which will instruct the AI Judge on how to
            evaluate the assistant message.

            The LLM-Judge must respond with "pass" or "fail" to indicate if the
            assistant message passes the eval.


            To access the messages in the mock conversation, use the LiquidJS
            variable `{{messages}}`.

            The assistant message to be evaluated will be passed as the last
            message in the `messages` array and can be accessed using
            `{{messages[-1]}}`.


            It is recommended to use the system message to instruct the LLM how
            to evaluate the assistant message, and then use the first user
            message to pass the assistant message to be evaluated.
      required:
        - provider
        - model
        - messages
      title: EvalGoogleModel
    EvalCustomModelProvider:
      type: string
      enum:
        - custom-llm
      description: This is the provider of the model (`custom-llm`).
      title: EvalCustomModelProvider
    EvalCustomModelHeaders:
      type: object
      properties: {}
      description: These are the headers we'll use for the OpenAI client's `headers`.
      title: EvalCustomModelHeaders
    EvalCustomModelMessagesItems:
      type: object
      properties: {}
      title: EvalCustomModelMessagesItems
    EvalCustomModel:
      type: object
      properties:
        provider:
          $ref: '#/components/schemas/EvalCustomModelProvider'
          description: This is the provider of the model (`custom-llm`).
        url:
          type: string
          description: >-
            These is the URL we'll use for the OpenAI client's `baseURL`. Ex.
            https://openrouter.ai/api/v1
        headers:
          $ref: '#/components/schemas/EvalCustomModelHeaders'
          description: These are the headers we'll use for the OpenAI client's `headers`.
        timeoutSeconds:
          type: number
          format: double
          description: >-
            This sets the timeout for the connection to the custom provider
            without needing to stream any tokens back. Default is 20 seconds.
        model:
          type: string
          description: This is the name of the model. Ex. gpt-4o
        temperature:
          type: number
          format: double
          description: >-
            This is the temperature of the model. For LLM-as-a-judge, it's
            recommended to set it between 0 - 0.3 to avoid hallucinations and
            ensure the model judges the output correctly based on the
            instructions.
        maxTokens:
          type: number
          format: double
          description: >-
            This is the max tokens of the model.

            If your Judge instructions return `true` or `false` takes only 1
            token (as per the OpenAI Tokenizer), and therefore is recommended to
            set it to a low number to force the model to return a short
            response.
        messages:
          type: array
          items:
            $ref: '#/components/schemas/EvalCustomModelMessagesItems'
          description: >-
            These are the messages which will instruct the AI Judge on how to
            evaluate the assistant message.

            The LLM-Judge must respond with "pass" or "fail" to indicate if the
            assistant message passes the eval.


            To access the messages in the mock conversation, use the LiquidJS
            variable `{{messages}}`.

            The assistant message to be evaluated will be passed as the last
            message in the `messages` array and can be accessed using
            `{{messages[-1]}}`.


            It is recommended to use the system message to instruct the LLM how
            to evaluate the assistant message, and then use the first user
            message to pass the assistant message to be evaluated.
      required:
        - provider
        - url
        - model
        - messages
      title: EvalCustomModel
    AssistantMessageJudgePlanAiModel:
      oneOf:
        - $ref: '#/components/schemas/EvalOpenAIModel'
        - $ref: '#/components/schemas/EvalAnthropicModel'
        - $ref: '#/components/schemas/EvalGoogleModel'
        - $ref: '#/components/schemas/EvalCustomModel'
      description: >-
        This is the model to use for the LLM-as-a-judge.

        If not provided, will default to the assistant's model.


        The instructions on how to evaluate the model output with this LLM-Judge
        must be passed as a system message in the messages array of the model.


        The Mock conversation can be passed to the LLM-Judge to evaluate using
        the prompt {{messages}} and will be evaluated as a LiquidJS Variable. To
        access and judge only the last message, use {{messages[-1]}}


        The LLM-Judge must respond with "pass" or "fail" and only those two
        responses are allowed.
      title: AssistantMessageJudgePlanAiModel
    AssistantMessageJudgePlanAiType:
      type: string
      enum:
        - ai
      description: |-
        This is the type of the judge plan.
        Use 'ai' to evaluate the assistant message content using LLM-as-a-judge.
        @default 'ai'
      title: AssistantMessageJudgePlanAiType
    AssistantMessageJudgePlanAI:
      type: object
      properties:
        model:
          $ref: '#/components/schemas/AssistantMessageJudgePlanAiModel'
          description: >-
            This is the model to use for the LLM-as-a-judge.

            If not provided, will default to the assistant's model.


            The instructions on how to evaluate the model output with this
            LLM-Judge must be passed as a system message in the messages array
            of the model.


            The Mock conversation can be passed to the LLM-Judge to evaluate
            using the prompt {{messages}} and will be evaluated as a LiquidJS
            Variable. To access and judge only the last message, use
            {{messages[-1]}}


            The LLM-Judge must respond with "pass" or "fail" and only those two
            responses are allowed.
        type:
          $ref: '#/components/schemas/AssistantMessageJudgePlanAiType'
          description: >-
            This is the type of the judge plan.

            Use 'ai' to evaluate the assistant message content using
            LLM-as-a-judge.

            @default 'ai'
        autoIncludeMessageHistory:
          type: boolean
          description: >-
            This is the flag to enable automatically adding the liquid variable
            {{messages}} to the model's messages array

            This is only applicable if the user has not provided any messages in
            the model's messages array

            @default true
      required:
        - model
        - type
      title: AssistantMessageJudgePlanAI
    ChatEvalToolResponseMessageEvaluation:
      type: object
      properties:
        role:
          $ref: '#/components/schemas/ChatEvalToolResponseMessageEvaluationRole'
          description: |-
            This is the role of the message author.
            For a tool response message evaluation, the role is always 'tool'
            @default 'tool'
        judgePlan:
          $ref: '#/components/schemas/AssistantMessageJudgePlanAI'
          description: >-
            This is the judge plan that instructs how to evaluate the tool
            response message.

            The tool response message can be evaluated with an LLM-as-judge by
            defining the evaluation criteria in a prompt.
      required:
        - role
        - judgePlan
      title: ChatEvalToolResponseMessageEvaluation
    ChatEvalUserMessageMockRole:
      type: string
      enum:
        - user
      default: user
      description: |-
        This is the role of the message author.
        For a mock user message, the role is always 'user'
        @default 'user'
      title: ChatEvalUserMessageMockRole
    ChatEvalUserMessageMock:
      type: object
      properties:
        role:
          $ref: '#/components/schemas/ChatEvalUserMessageMockRole'
          description: |-
            This is the role of the message author.
            For a mock user message, the role is always 'user'
            @default 'user'
        content:
          type: string
          description: |-
            This is the content of the user message.
            This is the message that the user would have sent.
      required:
        - role
        - content
      title: ChatEvalUserMessageMock
    ChatEvalAssistantMessageEvaluationRole:
      type: string
      enum:
        - assistant
      default: assistant
      description: |-
        This is the role of the message author.
        For an assistant message evaluation, the role is always 'assistant'
        @default 'assistant'
      title: ChatEvalAssistantMessageEvaluationRole
    AssistantMessageJudgePlanExactType:
      type: string
      enum:
        - exact
      description: >-
        This is the type of the judge plan.

        Use 'exact' for an exact match on the content and tool calls - without
        using LLM-as-a-judge.

        @default 'exact'
      title: AssistantMessageJudgePlanExactType
    AssistantMessageJudgePlanExact:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/AssistantMessageJudgePlanExactType'
          description: >-
            This is the type of the judge plan.

            Use 'exact' for an exact match on the content and tool calls -
            without using LLM-as-a-judge.

            @default 'exact'
        content:
          type: string
          description: >-
            This is what that will be used to evaluate the model's message
            content.

            If you provide a string, the assistant message content will be
            evaluated against it as an exact match, case-insensitive.
        toolCalls:
          type: array
          items:
            $ref: '#/components/schemas/ChatEvalAssistantMessageMockToolCall'
          description: >-
            This is the tool calls that will be used to evaluate the model's
            message content.

            The tool name must be a valid tool that the assistant is allowed to
            call.


            For the Query tool, the arguments for the tool call are in the
            format - {knowledgeBaseNames: ['kb_name', 'kb_name_2']}


            For the DTMF tool, the arguments for the tool call are in the format
            - {dtmf: "1234*"}


            For the Handoff tool, the arguments for the tool call are in the
            format - {destination: "assistant_id"}


            For the Transfer Call tool, the arguments for the tool call are in
            the format - {destination: "phone_number_or_assistant_id"}


            For all other tools, they are called without arguments or with
            user-defined arguments
      required:
        - type
        - content
      title: AssistantMessageJudgePlanExact
    AssistantMessageJudgePlanRegexType:
      type: string
      enum:
        - regex
      description: >-
        This is the type of the judge plan.

        Use 'regex' for a regex match on the content and tool calls - without
        using LLM-as-a-judge.

        @default 'regex'
      title: AssistantMessageJudgePlanRegexType
    AssistantMessageJudgePlanRegex:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/AssistantMessageJudgePlanRegexType'
          description: >-
            This is the type of the judge plan.

            Use 'regex' for a regex match on the content and tool calls -
            without using LLM-as-a-judge.

            @default 'regex'
        content:
          type: string
          description: >-
            This is what that will be used to evaluate the model's message
            content.

            The content will be evaluated against the regex pattern provided in
            the Judge Plan content field.

            Evaluation is considered successful if the regex pattern matches any
            part of the assistant message content.
        toolCalls:
          type: array
          items:
            $ref: '#/components/schemas/ChatEvalAssistantMessageMockToolCall'
          description: >-
            This is the tool calls that will be used to evaluate the model's
            message content.

            The tool name must be a valid tool that the assistant is allowed to
            call.

            The values to the arguments for the tool call should be a Regular
            Expression.

            Evaluation is considered successful if the regex pattern matches any
            part of each tool call argument.


            For the Query tool, the arguments for the tool call are in the
            format - {knowledgeBaseNames: ['kb_name', 'kb_name_2']}


            For the DTMF tool, the arguments for the tool call are in the format
            - {dtmf: "1234*"}


            For the Handoff tool, the arguments for the tool call are in the
            format - {destination: "assistant_id"}


            For the Transfer Call tool, the arguments for the tool call are in
            the format - {destination: "phone_number_or_assistant_id"}


            For all other tools, they are called without arguments or with
            user-defined arguments
      required:
        - type
        - content
      title: AssistantMessageJudgePlanRegex
    ChatEvalAssistantMessageEvaluationJudgePlan:
      oneOf:
        - $ref: '#/components/schemas/AssistantMessageJudgePlanExact'
        - $ref: '#/components/schemas/AssistantMessageJudgePlanRegex'
        - $ref: '#/components/schemas/AssistantMessageJudgePlanAI'
      description: >-
        This is the judge plan that instructs how to evaluate the assistant
        message.

        The assistant message can be evaluated against fixed content (exact
        match or RegEx) or with an LLM-as-judge by defining the evaluation
        criteria in a prompt.
      title: ChatEvalAssistantMessageEvaluationJudgePlan
    AssistantMessageEvaluationContinuePlan:
      type: object
      properties:
        exitOnFailureEnabled:
          type: boolean
          description: >-
            This is whether the evaluation should exit if the assistant message
            evaluates to false.

            By default, it is false and the evaluation will continue.

            @default false
        contentOverride:
          type: string
          description: >-
            This is the content that will be used in the conversation for this
            assistant turn moving forward if provided.

            It will override the content received from the model.
        toolCallsOverride:
          type: array
          items:
            $ref: '#/components/schemas/ChatEvalAssistantMessageMockToolCall'
          description: >-
            This is the tool calls that will be used in the conversation for
            this assistant turn moving forward if provided.

            It will override the tool calls received from the model.
      title: AssistantMessageEvaluationContinuePlan
    ChatEvalAssistantMessageEvaluation:
      type: object
      properties:
        role:
          $ref: '#/components/schemas/ChatEvalAssistantMessageEvaluationRole'
          description: |-
            This is the role of the message author.
            For an assistant message evaluation, the role is always 'assistant'
            @default 'assistant'
        judgePlan:
          $ref: '#/components/schemas/ChatEvalAssistantMessageEvaluationJudgePlan'
          description: >-
            This is the judge plan that instructs how to evaluate the assistant
            message.

            The assistant message can be evaluated against fixed content (exact
            match or RegEx) or with an LLM-as-judge by defining the evaluation
            criteria in a prompt.
        continuePlan:
          $ref: '#/components/schemas/AssistantMessageEvaluationContinuePlan'
          description: >-
            This is the plan for how the overall evaluation will proceed after
            the assistant message is evaluated.

            This lets you configure whether to stop the evaluation if this
            message fails, and whether to override any content for future turns
      required:
        - role
        - judgePlan
      title: ChatEvalAssistantMessageEvaluation
    UpdateEvalDtoMessagesItems:
      oneOf:
        - $ref: '#/components/schemas/ChatEvalAssistantMessageMock'
        - $ref: '#/components/schemas/ChatEvalSystemMessageMock'
        - $ref: '#/components/schemas/ChatEvalToolResponseMessageMock'
        - $ref: '#/components/schemas/ChatEvalToolResponseMessageEvaluation'
        - $ref: '#/components/schemas/ChatEvalUserMessageMock'
        - $ref: '#/components/schemas/ChatEvalAssistantMessageEvaluation'
      title: UpdateEvalDtoMessagesItems
    UpdateEvalDtoType:
      type: string
      enum:
        - chat.mockConversation
      description: |-
        This is the type of the eval.
        Currently it is fixed to `chat.mockConversation`.
      title: UpdateEvalDtoType
    UpdateEvalDTO:
      type: object
      properties:
        messages:
          type: array
          items:
            $ref: '#/components/schemas/UpdateEvalDtoMessagesItems'
          description: >-
            This is the mock conversation that will be used to evaluate the flow
            of the conversation.


            Mock Messages are used to simulate the flow of the conversation


            Evaluation Messages are used as checkpoints in the flow where the
            model's response to previous conversation needs to be evaluated to
            check the content and tool calls
        name:
          type: string
          description: |-
            This is the name of the eval.
            It helps identify what the eval is checking for.
        description:
          type: string
          description: >-
            This is the description of the eval.

            This helps describe the eval and its purpose in detail. It will not
            be used to evaluate the flow of the conversation.
        type:
          $ref: '#/components/schemas/UpdateEvalDtoType'
          description: |-
            This is the type of the eval.
            Currently it is fixed to `chat.mockConversation`.
      title: UpdateEvalDTO
    EvalMessagesItems:
      oneOf:
        - $ref: '#/components/schemas/ChatEvalAssistantMessageMock'
        - $ref: '#/components/schemas/ChatEvalSystemMessageMock'
        - $ref: '#/components/schemas/ChatEvalToolResponseMessageMock'
        - $ref: '#/components/schemas/ChatEvalToolResponseMessageEvaluation'
        - $ref: '#/components/schemas/ChatEvalUserMessageMock'
        - $ref: '#/components/schemas/ChatEvalAssistantMessageEvaluation'
      title: EvalMessagesItems
    EvalType:
      type: string
      enum:
        - chat.mockConversation
      description: |-
        This is the type of the eval.
        Currently it is fixed to `chat.mockConversation`.
      title: EvalType
    Eval:
      type: object
      properties:
        messages:
          type: array
          items:
            $ref: '#/components/schemas/EvalMessagesItems'
          description: >-
            This is the mock conversation that will be used to evaluate the flow
            of the conversation.


            Mock Messages are used to simulate the flow of the conversation


            Evaluation Messages are used as checkpoints in the flow where the
            model's response to previous conversation needs to be evaluated to
            check the content and tool calls
        id:
          type: string
        orgId:
          type: string
        createdAt:
          type: string
          format: date-time
        updatedAt:
          type: string
          format: date-time
        name:
          type: string
          description: |-
            This is the name of the eval.
            It helps identify what the eval is checking for.
        description:
          type: string
          description: >-
            This is the description of the eval.

            This helps describe the eval and its purpose in detail. It will not
            be used to evaluate the flow of the conversation.
        type:
          $ref: '#/components/schemas/EvalType'
          description: |-
            This is the type of the eval.
            Currently it is fixed to `chat.mockConversation`.
      required:
        - messages
        - id
        - orgId
        - createdAt
        - updatedAt
        - type
      title: Eval
  securitySchemes:
    bearer:
      type: http
      scheme: bearer
      description: Retrieve your API Key from [Dashboard](dashboard.vapi.ai).

```

## SDK Code Examples

```python
from vapi import Vapi

client = Vapi(
    token="YOUR_TOKEN_HERE",
)

client.eval.eval_controller_update(
    id="id",
)

```

```go
package example

import (
    context "context"

    serversdkgo "github.com/VapiAI/server-sdk-go"
    client "github.com/VapiAI/server-sdk-go/client"
    option "github.com/VapiAI/server-sdk-go/option"
)

func do() {
    client := client.NewClient(
        option.WithToken(
            "YOUR_TOKEN_HERE",
        ),
    )
    request := &serversdkgo.UpdateEvalDto{
        Id: "id",
    }
    client.Eval.EvalControllerUpdate(
        context.TODO(),
        request,
    )
}

```