Voice Recognition ‎

This example sends a POST request to the VDK service for voice recognition. The response includes a token, which is used to connect to the WebSocket and stream audio data from a file. It simultaneously handles receiving packets from the WebSocket and print incoming events, results, or errors to the console.

C# Example

C#

using System.Text.Json;
using System.Net.WebSockets;
using System.Text;

const string rawAudioFile = @"C:\Users\vivoka\Music\coffee.raw";
const string requestUri = "http://localhost:39806/v1/voice-recognition/recognize";
const string audioDataHeader = "data:audio/pcm;base64,";
var requestData = new
{
    models = new
    {
        VendingMachine = new
        {
            slots = new
            {
                drink = new { values = new[] { "Coffee", "Cola", "Mojito", "Cup of tea" } }
            }
        }
    }
};

using (var client = new HttpClient())
{
    var json = JsonSerializer.Serialize(requestData);
    var content = new StringContent(json, Encoding.UTF8, "application/json");
    var response = await client.PostAsync(requestUri, content);
    response.EnsureSuccessStatusCode();
    var responseBody = await response.Content.ReadAsStringAsync();

    // Step 2: Extract the WebSocket URL from the response
    var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
    var token = jsonResponse.GetProperty("token").ToString();
    var webSocketUrl = $"ws://localhost:39806/v1/ws/{token}";

    // Step 3: Connect to the WebSocket and start sending audio data
    using (var webSocket = new ClientWebSocket())
    {
        await webSocket.ConnectAsync(new Uri(webSocketUrl), CancellationToken.None);
        var sending = Task.Run(() => SendAudioData(webSocket, rawAudioFile));
        var receiving = Task.Run(() => ReceivePackets(webSocket));
        await Task.WhenAll(sending, receiving);
    }
}

async Task SendAudioData(ClientWebSocket webSocket, string audioFilePath)
{
    // Read audio data from a file and send it throw websocket
    using (var fs = File.OpenRead(audioFilePath))
    {
        int bytesRead;
        var buffer = new byte[1024];
        while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
        {
            var base64 = Convert.ToBase64String(new ArraySegment<byte>(buffer, 0, bytesRead));
            var audioChunk = new
            {
                data = $"{audioDataHeader}{base64}",
                last = fs.Position == fs.Length
            };
            var json = JsonSerializer.Serialize(audioChunk);
            var bytes = Encoding.UTF8.GetBytes(json);
            await webSocket.SendAsync(new ArraySegment<byte>(bytes), WebSocketMessageType.Text, true, CancellationToken.None);
        }
    }
}

async Task ReceivePackets(ClientWebSocket webSocket)
{
    var message = "";
    var buffer = new byte[1024];
    while (webSocket.State == WebSocketState.Open)
    {
        var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
        if (packet.MessageType == WebSocketMessageType.Close)
        {
            await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
            break;
        }
        else
        {
            message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
            if (packet.EndOfMessage)
            {
                HandleMessage(message);
                message = "";
            }
        }
    }
}

void HandleMessage(string message)
{
    try
    {
        using (var document = JsonDocument.Parse(message))
        {
            var root = document.RootElement;
            if (root.TryGetProperty("event", out JsonElement eventElement))
                Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
            else if (root.TryGetProperty("result", out JsonElement resultElement))
                Console.WriteLine($"Result received: {JsonSerializer.Serialize(resultElement)}");
            else if (root.TryGetProperty("error", out JsonElement errorElement))
                Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
            else
                Console.WriteLine("Unknown message type received.");
        }
    }
    catch (JsonException ex)
    {
        Console.WriteLine($"Failed to parse message: {ex.Message}");
    }
}

Python Example

PY

import asyncio
import json
import base64
import os
import websocket
import requests

async def main():
    raw_audio_file = r"C:\Users\vivoka\Music\coffee.raw"
    request_uri = "http://localhost:39806/v1/voice-recognition/recognize"
    request_data = {
        "models": {
            "VendingMachine": {
                "slots": {
                    "drink": {
                        "values": ["Coffee", "Cola", "Mojito", "Cup of tea"]
                    }
                }
            }
        }
    }

    response = requests.post(request_uri, json=request_data)
    response_data = response.json()

    token = response_data["token"]
    web_socket_url = f"ws://localhost:39806/v1/ws/{token}"
    ws = websocket.WebSocketApp(web_socket_url, on_message=handle_message)
    ws.on_open = lambda _: send_audio_data(ws, raw_audio_file)
    ws.run_forever()

def send_audio_data(ws, audio_file_path):
    # Read audio data from a file and send it through websocket
    fileSize = os.path.getsize(audio_file_path)
    with open(audio_file_path, "rb") as f:
        while True:
            chunk = f.read(1024)
            if not chunk:
                break

            base64_chunk = base64.b64encode(chunk).decode("utf-8")
            audio_chunk = {
                "data": f"data:audio/pcm;base64,{base64_chunk}",
                "last": f.tell() == fileSize
            }
            json_chunk = json.dumps(audio_chunk)
            ws.send(json_chunk)

def handle_message(ws, message):
    try:
        data = json.loads(message)

        if "event" in data:
            print(f"Event received: {json.dumps(data['event'])}")
        elif "result" in data:
            print(f"Result received: {json.dumps(data['result'])}")
        elif "error" in data:
            print(f"Error received: {json.dumps(data['error'])}")
        else:
            print("Unknown message type received.")
    except json.JSONDecodeError as ex:
        print(f"Failed to parse message: {ex}")

if __name__ == "__main__":
    asyncio.run(main())