Voice Synthesis
This example retrieves available voices and selects the first one for synthesis. Then, it sends a POST request for synthesis with voice_id
and text
to synthesis. In the response we use the token
to connect to the WebSocket. Through this connection, it continuously receives messages, primarily containing synthesized audio data, which are saved into a result audio file, alongside handling any events or errors that may arise.
C# Example
C#
using System.Text.Json;
using System.Net.WebSockets;
using System.Text;
const string resultRawAudioFile = @"C:\Users\vivoka\Music\coffee.raw";
const string voicesUri = "http://localhost:39806/v1/voice-synthesis/voices";
const string synthesisUri = "http://localhost:39806/v1/voice-synthesis/synthesize";
const string audioDataHeader = "data:audio/pcm;base64,";
using (var client = new HttpClient())
{
// Create result audio file
using (File.Create(resultRawAudioFile)) { }
// Get informations about the available voices
var voicesResponse = await client.GetAsync(voicesUri);
voicesResponse.EnsureSuccessStatusCode();
var voicesResponseBody = await voicesResponse.Content.ReadAsStringAsync();
var voicesResponseJson = JsonDocument.Parse(voicesResponseBody).RootElement;
Console.WriteLine($"Voices : {voicesResponseBody}");
if (voicesResponseJson.EnumerateObject().Count() == 0)
{
Console.WriteLine("No available voices");
return;
}
// Use first available voice
var requestData = new { text = "I want a coffee", voice_id = voicesResponseJson.EnumerateObject().First().Name };
// Send a POST method to request the token
var json = JsonSerializer.Serialize(requestData);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await client.PostAsync(synthesisUri, content);
response.EnsureSuccessStatusCode();
var responseBody = await response.Content.ReadAsStringAsync();
// Extract the WebSocket URL from the response
var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
var sampleRate = jsonResponse.GetProperty("sample_rate").ToString();
var token = jsonResponse.GetProperty("token").ToString();
var webSocketUrl = $"ws://localhost:39806/v1/ws/{token}";
// Connect to the WebSocket and start receiving messages
using (var webSocket = new ClientWebSocket())
{
await webSocket.ConnectAsync(new Uri(webSocketUrl), CancellationToken.None);
var receiving = Task.Run(() => ReceiveMessages(webSocket));
await Task.WhenAll(receiving);
}
}
async Task ReceiveMessages(ClientWebSocket webSocket)
{
var message = "";
var buffer = new byte[1024];
while (webSocket.State == WebSocketState.Open)
{
var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
if (packet.MessageType == WebSocketMessageType.Close)
{
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
break;
}
else
{
message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
if (packet.EndOfMessage)
{
HandleMessage(message);
message = "";
}
}
}
}
void HandleMessage(string message)
{
try
{
using (var document = JsonDocument.Parse(message))
{
JsonElement root = document.RootElement;
if (root.TryGetProperty("data", out JsonElement dataElement))
{
var data = dataElement.ToString();
if (data.StartsWith(audioDataHeader))
{
// Append result audio file
using (var fileStream = new FileStream(resultRawAudioFile, FileMode.Append, FileAccess.Write, FileShare.None))
new BinaryWriter(fileStream).Write(Convert.FromBase64String(data.Substring(audioDataHeader.Length)));
}
}
else if (root.TryGetProperty("event", out JsonElement eventElement))
Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
else if (root.TryGetProperty("error", out JsonElement errorElement))
Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
else
Console.WriteLine("Unknown message type received.");
}
}
catch (JsonException ex)
{
Console.WriteLine($"Failed to parse message: {ex.Message}");
}
}
Python Example
PY
import asyncio
import base64
import json
import requests
import websockets
result_raw_audio_file = r"C:\Users\vivoka\Music\tts.raw"
voices_uri = "http://localhost:39806/v1/voice-synthesis/voices"
synthesis_uri = "http://localhost:39806/v1/voice-synthesis/synthesize"
audio_data_header = "data:audio/pcm;base64,"
async def main():
create_result_audio_file()
# Get available voices
voices_response = requests.get(voices_uri)
voices_response.raise_for_status()
voices_response_body = voices_response.json()
if not voices_response_body:
print("No available voices")
return
else:
print(f"Voices : {voices_response_body}")
# Use the first available voice for synthesis
request_data = {
"text": "I want a coffee",
"voice_id": list(voices_response_body.keys())[0]
}
# Send synthesis request and get the token
synthesis_response = requests.post(synthesis_uri, json=request_data)
synthesis_response.raise_for_status()
synthesis_response_body = synthesis_response.json()
token = synthesis_response_body["token"]
# Connect to websocket and start receiving messages
web_socket_url = f"ws://localhost:39806/v1/ws/{token}"
async with websockets.connect(web_socket_url) as websocket:
await handle_message(websocket)
def create_result_audio_file():
open(result_raw_audio_file, "wb").close()
async def handle_message(websocket):
try:
async for message in websocket:
body = json.loads(message)
if "data" in body:
data = body["data"]
if data.startswith(audio_data_header):
# Append result audio file
with open(result_raw_audio_file, "ab") as file:
file.write(base64.b64decode(data[len(audio_data_header):]))
elif "event" in body:
print(f"Event received: {json.dumps(body['event'])}")
elif "error" in body:
print(f"Error received: {json.dumps(body['error'])}")
else:
print("Unknown message type received.")
except json.JSONDecodeError as ex:
print(f"Failed to parse message: {ex}")
if __name__ == "__main__":
asyncio.run(main())