Skip to main content

Basic Streaming

import asyncio
import json
import base64
import websockets

async def tts_stream():
    url = "wss://api.telnyx.com/v2/text-to-speech/speech?voice=Telnyx.NaturalHD.astra"
    headers = {"Authorization": "Bearer YOUR_API_KEY"}

    async with websockets.connect(url, extra_headers=headers) as ws:
        # 1. Handshake
        await ws.send(json.dumps({"text": " "}))

        # 2. Send text
        await ws.send(json.dumps({"text": "Hello from Telnyx text-to-speech."}))

        # 3. Signal end of input
        await ws.send(json.dumps({"text": ""}))

        # 4. Collect audio
        audio_chunks = []
        async for message in ws:
            data = json.loads(message)

            if data.get("error"):
                print(f"Error: {data['error']}")
                break

            if data.get("audio"):
                audio_chunks.append(base64.b64decode(data["audio"]))

            if data.get("isFinal"):
                break

    # Save audio
    with open("output.mp3", "wb") as f:
        for chunk in audio_chunks:
            f.write(chunk)

asyncio.run(tts_stream())

Conversational (Barge-In)

Send multiple text segments and interrupt mid-synthesis:
import asyncio
import json
import base64
import websockets

async def conversational_tts():
    url = "wss://api.telnyx.com/v2/text-to-speech/speech?voice=Telnyx.NaturalHD.astra"
    headers = {"Authorization": "Bearer YOUR_API_KEY"}

    async with websockets.connect(url, extra_headers=headers) as ws:
        # Handshake with voice settings
        await ws.send(json.dumps({
            "text": " ",
            "voice_settings": {"voice_speed": 1.1}
        }))

        # Send first sentence
        await ws.send(json.dumps({"text": "Welcome to the demo."}))

        # Wait for first audio, then interrupt
        async for message in ws:
            data = json.loads(message)
            if data.get("isFinal"):
                break

        # Interrupt and send new text
        await ws.send(json.dumps({"force": true}))
        await ws.send(json.dumps({"text": "Actually, let me start over."}))

        # Collect remaining audio...
        await ws.send(json.dumps({"text": ""}))

        async for message in ws:
            data = json.loads(message)
            if data.get("isFinal"):
                break

asyncio.run(conversational_tts())

LLM Token Streaming

Stream tokens from an LLM directly to TTS. The server buffers text and synthesizes at sentence boundaries:
import asyncio
import json
import websockets

async def llm_to_tts(llm_token_stream):
    url = "wss://api.telnyx.com/v2/text-to-speech/speech?voice=Telnyx.NaturalHD.astra"
    headers = {"Authorization": "Bearer YOUR_API_KEY"}

    async with websockets.connect(url, extra_headers=headers) as ws:
        await ws.send(json.dumps({"text": " "}))

        # Stream LLM tokens directly — TTS handles sentence buffering
        for token in llm_token_stream:
            await ws.send(json.dumps({"text": token}))

        # Done — flush remaining
        await ws.send(json.dumps({"text": ""}))
Markdown in LLM output is automatically stripped before synthesis — headers, bold, italics, code blocks, and links are converted to plain text.