> ## Documentation Index
> Fetch the complete documentation index at: https://developers.telnyx.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Generate speech from text

> Generate synthesized speech audio from text input. Returns audio in the requested format (binary audio stream, base64-encoded JSON, or an audio URL for later retrieval).

Authentication is provided via the standard `Authorization: Bearer <API_KEY>` header.

The `voice` parameter provides a convenient shorthand to specify provider, model, and voice in a single string (e.g. `telnyx.NaturalHD.Alloy` or `Telnyx.Ultra.<voice_id>`). Alternatively, specify `provider` explicitly along with provider-specific parameters.

Supported providers: `aws`, `telnyx`, `azure`, `elevenlabs`, `minimax`, `rime`, `resemble`, `xai`.

The Telnyx `Ultra` model supports 44 languages with emotion control, speed adjustment, and volume control. Use the `telnyx` provider-specific parameters to configure these features.


## OpenAPI

````yaml https://telnyx-openapi-ng.s3.us-east-1.amazonaws.com/text-to-speech/text-to-speech.yml post /text-to-speech
openapi: 3.1.0
info:
  title: Text to Speech API
  version: 2.0.0
  description: API for managing Text to Speech.
  contact:
    email: support@telnyx.com
servers:
  - url: https://api.telnyx.com/v2
security:
  - bearerAuth: []
tags:
  - name: Text to Speech
    description: Text to Speech operations
paths:
  /text-to-speech:
    post:
      tags:
        - Text To Speech Commands
      summary: Generate speech from text
      description: >-
        Generate synthesized speech audio from text input. Returns audio in the
        requested format (binary audio stream, base64-encoded JSON, or an audio
        URL for later retrieval).


        Authentication is provided via the standard `Authorization: Bearer
        <API_KEY>` header.


        The `voice` parameter provides a convenient shorthand to specify
        provider, model, and voice in a single string (e.g.
        `telnyx.NaturalHD.Alloy` or `Telnyx.Ultra.<voice_id>`). Alternatively,
        specify `provider` explicitly along with provider-specific parameters.


        Supported providers: `aws`, `telnyx`, `azure`, `elevenlabs`, `minimax`,
        `rime`, `resemble`, `xai`.


        The Telnyx `Ultra` model supports 44 languages with emotion control,
        speed adjustment, and volume control. Use the `telnyx` provider-specific
        parameters to configure these features.
      operationId: generateSpeech
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateSpeechRequest'
      responses:
        '200':
          description: >-
            Speech generated successfully. The response format depends on the
            `output_type` parameter:

            - `binary_output` (default): Returns raw audio bytes with the
            appropriate `Content-Type` header (e.g. `audio/mpeg`).

            - `base64_output`: Returns a JSON object with `base64_audio` field.
          content:
            audio/mpeg:
              schema:
                type: string
                format: binary
                description: >-
                  Raw audio bytes. Returned when `output_type` is
                  `binary_output` (default).
            application/json:
              schema:
                $ref: '#/components/schemas/Base64AudioResponse'
        '400':
          description: Bad request — invalid parameters or provider error.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/text-to-speech_ErrorResponse'
        '401':
          description: Authentication failed — missing or invalid API key.
        '422':
          description: Validation failed — invalid or missing required fields.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ValidationErrorResponse'
      x-codeSamples:
        - lang: JavaScript
          source: |-
            import Telnyx from 'telnyx';

            const client = new Telnyx({
              apiKey: process.env['TELNYX_API_KEY'], // This is the default and can be omitted
            });

            const response = await client.textToSpeech.generate();

            console.log(response.base64_audio);
        - lang: Python
          source: |-
            import os
            from telnyx import Telnyx

            client = Telnyx(
                api_key=os.environ.get("TELNYX_API_KEY"),  # This is the default and can be omitted
            )
            response = client.text_to_speech.generate()
            print(response.base64_audio)
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/team-telnyx/telnyx-go\"\n\t\"github.com/team-telnyx/telnyx-go/option\"\n)\n\nfunc main() {\n\tclient := telnyx.NewClient(\n\t\toption.WithAPIKey(\"My API Key\"),\n\t)\n\tresponse, err := client.TextToSpeech.Generate(context.TODO(), telnyx.TextToSpeechGenerateParams{})\n\tif err != nil {\n\t\tpanic(err.Error())\n\t}\n\tfmt.Printf(\"%+v\\n\", response.Base64Audio)\n}\n"
        - lang: Java
          source: >-
            package com.telnyx.sdk.example;


            import com.telnyx.sdk.client.TelnyxClient;

            import com.telnyx.sdk.client.okhttp.TelnyxOkHttpClient;

            import
            com.telnyx.sdk.models.texttospeech.TextToSpeechGenerateParams;

            import
            com.telnyx.sdk.models.texttospeech.TextToSpeechGenerateResponse;


            public final class Main {
                private Main() {}

                public static void main(String[] args) {
                    TelnyxClient client = TelnyxOkHttpClient.fromEnv();

                    TextToSpeechGenerateResponse response = client.textToSpeech().generate();
                }
            }
        - lang: Ruby
          source: |-
            require "telnyx"

            telnyx = Telnyx::Client.new(api_key: "My API Key")

            response = telnyx.text_to_speech.generate

            puts(response)
        - lang: PHP
          source: >-
            <?php


            require_once dirname(__DIR__) . '/vendor/autoload.php';


            use Telnyx\Client;

            use Telnyx\Core\Exceptions\APIException;


            $client = new Client(apiKey: getenv('TELNYX_API_KEY') ?: 'My API
            Key');


            try {
              $response = $client->textToSpeech->generate(
                aws: [
                  'languageCode' => 'language_code',
                  'lexiconNames' => ['string'],
                  'outputFormat' => 'output_format',
                  'sampleRate' => 'sample_rate',
                  'textType' => 'text',
                ],
                azure: [
                  'apiKey' => 'api_key',
                  'deploymentID' => 'deployment_id',
                  'effect' => 'effect',
                  'gender' => 'gender',
                  'languageCode' => 'language_code',
                  'outputFormat' => 'output_format',
                  'region' => 'region',
                  'textType' => 'text',
                ],
                disableCache: true,
                elevenlabs: [
                  'apiKey' => 'api_key',
                  'languageCode' => 'language_code',
                  'voiceSettings' => ['foo' => 'bar'],
                ],
                language: 'language',
                minimax: [
                  'languageBoost' => 'language_boost',
                  'pitch' => 0,
                  'responseFormat' => 'response_format',
                  'speed' => 0,
                  'vol' => 0,
                ],
                outputType: 'binary_output',
                provider: 'aws',
                resemble: [
                  'apiKey' => 'api_key',
                  'format' => 'format',
                  'precision' => 'precision',
                  'sampleRate' => 'sample_rate',
                ],
                rime: [
                  'responseFormat' => 'response_format',
                  'samplingRate' => 0,
                  'voiceSpeed' => 0,
                ],
                telnyx: [
                  'emotion' => 'neutral',
                  'responseFormat' => 'response_format',
                  'samplingRate' => 0,
                  'temperature' => 0,
                  'voiceSpeed' => 0.5,
                  'volume' => 0,
                ],
                text: 'text',
                textType: 'text',
                voice: 'voice',
                voiceSettings: ['foo' => 'bar'],
                xai: [
                  'voiceID' => 'eve',
                  'language' => 'language',
                  'outputFormat' => 'mp3',
                  'sampleRate' => 8000,
                ],
              );

              var_dump($response);
            } catch (APIException $e) {
              echo $e->getMessage();
            }
        - lang: CLI
          source: |-
            telnyx text-to-speech generate \
              --api-key 'My API Key'
components:
  schemas:
    GenerateSpeechRequest:
      type: object
      description: Request body for generating speech from text.
      properties:
        voice:
          type: string
          description: >-
            Voice identifier in the format `provider.model_id.voice_id` or
            `provider.voice_id`. Examples: `telnyx.NaturalHD.Alloy`,
            `Telnyx.Ultra.<voice_id>`, `Telnyx.Bayan.Ahmed`,
            `Telnyx.Sukhan.urdu-professor`, `azure.en-US-AvaMultilingualNeural`,
            `aws.Polly.Generative.Lucia`. When provided, `provider`, `model_id`,
            and `voice_id` are extracted automatically and take precedence over
            individual parameters.
        text:
          type: string
          description: The text to convert to speech.
        provider:
          type: string
          description: TTS provider. Required unless `voice` is provided.
          enum:
            - aws
            - telnyx
            - azure
            - elevenlabs
            - minimax
            - rime
            - resemble
            - xai
        language:
          type: string
          description: Language code (e.g. `en-US`). Usage varies by provider.
        text_type:
          type: string
          description: >-
            Text type. Use `ssml` for SSML-formatted input (supported by AWS and
            Azure).
          enum:
            - text
            - ssml
        output_type:
          type: string
          description: >-
            Determines the response format. `binary_output` returns raw audio
            bytes, `base64_output` returns base64-encoded audio in JSON.
          enum:
            - binary_output
            - base64_output
          default: binary_output
        disable_cache:
          type: boolean
          description: When `true`, bypass the audio cache and generate fresh audio.
          default: false
        voice_settings:
          type: object
          description: >-
            Provider-specific voice settings. Contents vary by provider — see
            provider-specific parameter objects below.
          additionalProperties: true
        aws:
          $ref: '#/components/schemas/AwsProviderParams'
        telnyx:
          $ref: '#/components/schemas/TelnyxProviderParams'
        azure:
          $ref: '#/components/schemas/AzureProviderParams'
        elevenlabs:
          $ref: '#/components/schemas/ElevenLabsProviderParams'
        minimax:
          $ref: '#/components/schemas/MinimaxProviderParams'
        rime:
          $ref: '#/components/schemas/RimeProviderParams'
        resemble:
          $ref: '#/components/schemas/ResembleProviderParams'
        xai:
          $ref: '#/components/schemas/XAIProviderParams'
    Base64AudioResponse:
      type: object
      description: Response when `output_type` is `base64_output`.
      properties:
        base64_audio:
          type: string
          description: Base64-encoded audio data.
    text-to-speech_ErrorResponse:
      type: object
      properties:
        errors:
          type: array
          items:
            type: object
            properties:
              code:
                type: string
                description: Error code.
              title:
                type: string
                description: Error title.
              detail:
                type: string
                description: Detailed error description.
    ValidationErrorResponse:
      type: object
      properties:
        errors:
          type: object
          description: Validation error details keyed by field name.
          additionalProperties:
            type: array
            items:
              type: string
    AwsProviderParams:
      type: object
      description: AWS Polly provider-specific parameters.
      properties:
        language_code:
          type: string
          description: Language code (e.g. `en-US`, `es-ES`).
        text_type:
          type: string
          description: Input text type.
          enum:
            - text
            - ssml
        lexicon_names:
          type: array
          items:
            type: string
          description: List of lexicon names to apply.
        output_format:
          type: string
          description: Audio output format.
        sample_rate:
          type: string
          description: Audio sample rate.
    TelnyxProviderParams:
      type: object
      description: >-
        Telnyx provider-specific parameters. Use `voice_speed` and `temperature`
        for `Natural` and `NaturalHD` models. For the `Ultra` model, use
        `voice_speed`, `volume`, and `emotion`. `Bayan` and `Sukhan` don't use
        `temperature`, `volume`, or `emotion`, and don't support `voice_speed`.
        `Sukhan`'s `response_format` is restricted to `mp3` or `pcm` (no `wav`).
      properties:
        voice_speed:
          type: number
          format: float
          description: >-
            Voice speed multiplier. Applies to all models except `Bayan` and
            `Sukhan`, which don't support it. Range: 0.5 to 2.0.
          minimum: 0.5
          maximum: 2
          default: 1
        response_format:
          type: string
          description: Audio response format.
          default: mp3
        sampling_rate:
          type: integer
          description: Audio sampling rate in Hz.
          default: 24000
        temperature:
          type: number
          format: float
          description: >-
            Sampling temperature. Applies to `Natural` and `NaturalHD` models
            only.
          default: 0.5
        volume:
          type: number
          format: float
          description: 'Volume level for the Ultra model. Range: 0.0 to 2.0.'
          minimum: 0
          maximum: 2
          default: 1
        emotion:
          type: string
          description: >-
            Emotion control for the Ultra model. Adjusts the emotional tone of
            the synthesized speech.
          enum:
            - neutral
            - happy
            - sad
            - angry
            - fearful
            - disgusted
            - surprised
    AzureProviderParams:
      type: object
      description: Azure Cognitive Services provider-specific parameters.
      properties:
        language_code:
          type: string
          description: Language code (e.g. `en-US`).
          default: en-US
        output_format:
          type: string
          description: Azure audio output format.
          default: audio-24khz-160kbitrate-mono-mp3
        text_type:
          type: string
          description: Input text type. Use `ssml` for SSML-formatted input.
          enum:
            - text
            - ssml
          default: text
        api_key:
          type: string
          description: >-
            Custom Azure API key. If not provided, the default Telnyx key is
            used.
        region:
          type: string
          description: Azure region (e.g. `eastus`, `westeurope`).
        deployment_id:
          type: string
          description: Custom Azure deployment ID.
        effect:
          type: string
          description: Azure audio effect to apply.
        gender:
          type: string
          description: Voice gender preference.
    ElevenLabsProviderParams:
      type: object
      description: ElevenLabs provider-specific parameters.
      properties:
        language_code:
          type: string
          description: Language code.
        api_key:
          type: string
          description: >-
            Custom ElevenLabs API key. If not provided, the default Telnyx key
            is used.
        voice_settings:
          type: object
          description: ElevenLabs voice settings (stability, similarity_boost, etc.).
          additionalProperties: true
    MinimaxProviderParams:
      type: object
      description: Minimax provider-specific parameters.
      properties:
        speed:
          type: number
          format: float
          description: Speech speed multiplier.
        vol:
          type: number
          format: float
          description: Volume level.
        pitch:
          type: integer
          description: Pitch adjustment.
        response_format:
          type: string
          description: Audio output format.
        language_boost:
          type: string
          description: Language code to boost pronunciation for.
    RimeProviderParams:
      type: object
      description: Rime provider-specific parameters.
      properties:
        voice_speed:
          type: number
          format: float
          description: Voice speed multiplier.
        response_format:
          type: string
          description: Audio output format.
        sampling_rate:
          type: integer
          description: Audio sampling rate in Hz.
    ResembleProviderParams:
      type: object
      description: Resemble AI provider-specific parameters.
      properties:
        api_key:
          type: string
          description: Custom Resemble API key.
        precision:
          type: string
          description: Synthesis precision.
        sample_rate:
          type: string
          description: Audio sample rate.
        format:
          type: string
          description: Audio output format.
    XAIProviderParams:
      type: object
      description: xAI provider-specific parameters.
      required:
        - voice_id
      properties:
        voice_id:
          type: string
          description: xAI voice identifier.
          enum:
            - eve
            - ara
            - rex
            - sal
            - leo
        language:
          type: string
          description: Language code, or `auto` to detect.
          default: auto
        output_format:
          type: string
          description: Audio output format.
          enum:
            - mp3
            - wav
            - pcm
            - mulaw
            - alaw
          default: mp3
        sample_rate:
          type: integer
          description: Audio sample rate in Hz.
          enum:
            - 8000
            - 16000
            - 22050
            - 24000
            - 44100
            - 48000
          default: 24000
  securitySchemes:
    bearerAuth:
      scheme: bearer
      type: http

````