Voice Recognition
This example sends a POST request to the VDK service for voice recognition. The response includes a token, which is used to connect to the WebSocket and stream audio data from a file. It simultaneously handles receiving packets from the WebSocket and print incoming events, results, or errors to the console.
C# Example
using System.Text.Json;
using System.Net.WebSockets;
using System.Text;
const string rawAudioFile = @"C:\Users\vivoka\Music\coffee.raw";
const string requestUri = "http://localhost:39806/v1/voice-recognition/recognize";
const string audioDataHeader = "data:audio/pcm;base64,";
var requestData = new
models = new
VendingMachine = new
slots = new
drink = new { values = new[] { "Coffee", "Cola", "Mojito", "Cup of tea" } }
using (var client = new HttpClient())
var json = JsonSerializer.Serialize(requestData);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await client.PostAsync(requestUri, content);
var responseBody = await response.Content.ReadAsStringAsync();
// Step 2: Extract the WebSocket URL from the response
var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
var token = jsonResponse.GetProperty("token").ToString();
var webSocketUrl = $"ws://localhost:39806/v1/ws/{token}";
// Step 3: Connect to the WebSocket and start sending audio data
using (var webSocket = new ClientWebSocket())
await webSocket.ConnectAsync(new Uri(webSocketUrl), CancellationToken.None);
var sending = Task.Run(() => SendAudioData(webSocket, rawAudioFile));
var receiving = Task.Run(() => ReceivePackets(webSocket));
await Task.WhenAll(sending, receiving);
async Task SendAudioData(ClientWebSocket webSocket, string audioFilePath)
// Read audio data from a file and send it throw websocket
using (var fs = File.OpenRead(audioFilePath))
int bytesRead;
var buffer = new byte[1024];
while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
var base64 = Convert.ToBase64String(new ArraySegment<byte>(buffer, 0, bytesRead));
var audioChunk = new
data = $"{audioDataHeader}{base64}",
last = fs.Position == fs.Length
var json = JsonSerializer.Serialize(audioChunk);
var bytes = Encoding.UTF8.GetBytes(json);
await webSocket.SendAsync(new ArraySegment<byte>(bytes), WebSocketMessageType.Text, true, CancellationToken.None);
async Task ReceivePackets(ClientWebSocket webSocket)
var message = "";
var buffer = new byte[1024];
while (webSocket.State == WebSocketState.Open)
var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
if (packet.MessageType == WebSocketMessageType.Close)
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
if (packet.EndOfMessage)
message = "";
void HandleMessage(string message)
using (var document = JsonDocument.Parse(message))
var root = document.RootElement;
if (root.TryGetProperty("event", out JsonElement eventElement))
Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
else if (root.TryGetProperty("result", out JsonElement resultElement))
Console.WriteLine($"Result received: {JsonSerializer.Serialize(resultElement)}");
else if (root.TryGetProperty("error", out JsonElement errorElement))
Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
Console.WriteLine("Unknown message type received.");
catch (JsonException ex)
Console.WriteLine($"Failed to parse message: {ex.Message}");
Python Example
import asyncio
import json
import base64
import os
import websocket
import requests
async def main():
raw_audio_file = r"C:\Users\vivoka\Music\coffee.raw"
request_uri = "http://localhost:39806/v1/voice-recognition/recognize"
request_data = {
"models": {
"VendingMachine": {
"slots": {
"drink": {
"values": ["Coffee", "Cola", "Mojito", "Cup of tea"]
response = requests.post(request_uri, json=request_data)
response_data = response.json()
token = response_data["token"]
web_socket_url = f"ws://localhost:39806/v1/ws/{token}"
ws = websocket.WebSocketApp(web_socket_url, on_message=handle_message)
ws.on_open = lambda _: send_audio_data(ws, raw_audio_file)
def send_audio_data(ws, audio_file_path):
# Read audio data from a file and send it through websocket
fileSize = os.path.getsize(audio_file_path)
with open(audio_file_path, "rb") as f:
while True:
chunk = f.read(1024)
if not chunk:
base64_chunk = base64.b64encode(chunk).decode("utf-8")
audio_chunk = {
"data": f"data:audio/pcm;base64,{base64_chunk}",
"last": f.tell() == fileSize
json_chunk = json.dumps(audio_chunk)
def handle_message(ws, message):
data = json.loads(message)
if "event" in data:
print(f"Event received: {json.dumps(data['event'])}")
elif "result" in data:
print(f"Result received: {json.dumps(data['result'])}")
elif "error" in data:
print(f"Error received: {json.dumps(data['error'])}")
print("Unknown message type received.")
except json.JSONDecodeError as ex:
print(f"Failed to parse message: {ex}")
if __name__ == "__main__":