Voice Recognition
This example sends a POST request to the VDK service for voice recognition. The response includes a token, which is used to connect to the WebSocket and stream audio data from a file. It simultaneously handles receiving packets from the WebSocket and print incoming events, results, or errors to the console.
C# Example
C#
using System.Text.Json;
using System.Net.WebSockets;
using System.Text;
const string rawAudioFile = @"C:\Users\vivoka\Music\coffee.raw";
const string requestUri = "http://localhost:39806/v1/voice-recognition/recognize";
const string audioDataHeader = "data:audio/pcm;base64,";
var requestData = new
{
models = new
{
VendingMachine = new
{
slots = new
{
drink = new { values = new[] { "Coffee", "Cola", "Mojito", "Cup of tea" } }
}
}
}
};
using (var client = new HttpClient())
{
var json = JsonSerializer.Serialize(requestData);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await client.PostAsync(requestUri, content);
response.EnsureSuccessStatusCode();
var responseBody = await response.Content.ReadAsStringAsync();
// Step 2: Extract the WebSocket URL from the response
var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
var token = jsonResponse.GetProperty("token").ToString();
var webSocketUrl = $"ws://localhost:39806/v1/ws/{token}";
// Step 3: Connect to the WebSocket and start sending audio data
using (var webSocket = new ClientWebSocket())
{
await webSocket.ConnectAsync(new Uri(webSocketUrl), CancellationToken.None);
var sending = Task.Run(() => SendAudioData(webSocket, rawAudioFile));
var receiving = Task.Run(() => ReceivePackets(webSocket));
await Task.WhenAll(sending, receiving);
}
}
async Task SendAudioData(ClientWebSocket webSocket, string audioFilePath)
{
// Read audio data from a file and send it throw websocket
using (var fs = File.OpenRead(audioFilePath))
{
int bytesRead;
var buffer = new byte[1024];
while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
var base64 = Convert.ToBase64String(new ArraySegment<byte>(buffer, 0, bytesRead));
var audioChunk = new
{
data = $"{audioDataHeader}{base64}",
last = fs.Position == fs.Length
};
var json = JsonSerializer.Serialize(audioChunk);
var bytes = Encoding.UTF8.GetBytes(json);
await webSocket.SendAsync(new ArraySegment<byte>(bytes), WebSocketMessageType.Text, true, CancellationToken.None);
}
}
}
async Task ReceivePackets(ClientWebSocket webSocket)
{
var message = "";
var buffer = new byte[1024];
while (webSocket.State == WebSocketState.Open)
{
var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
if (packet.MessageType == WebSocketMessageType.Close)
{
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
break;
}
else
{
message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
if (packet.EndOfMessage)
{
HandleMessage(message);
message = "";
}
}
}
}
void HandleMessage(string message)
{
try
{
using (var document = JsonDocument.Parse(message))
{
var root = document.RootElement;
if (root.TryGetProperty("event", out JsonElement eventElement))
Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
else if (root.TryGetProperty("result", out JsonElement resultElement))
Console.WriteLine($"Result received: {JsonSerializer.Serialize(resultElement)}");
else if (root.TryGetProperty("error", out JsonElement errorElement))
Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
else
Console.WriteLine("Unknown message type received.");
}
}
catch (JsonException ex)
{
Console.WriteLine($"Failed to parse message: {ex.Message}");
}
}
Python Example
PY
import asyncio
import json
import base64
import os
import websocket
import requests
async def main():
raw_audio_file = r"C:\Users\vivoka\Music\coffee.raw"
request_uri = "http://localhost:39806/v1/voice-recognition/recognize"
request_data = {
"models": {
"VendingMachine": {
"slots": {
"drink": {
"values": ["Coffee", "Cola", "Mojito", "Cup of tea"]
}
}
}
}
}
response = requests.post(request_uri, json=request_data)
response_data = response.json()
token = response_data["token"]
web_socket_url = f"ws://localhost:39806/v1/ws/{token}"
ws = websocket.WebSocketApp(web_socket_url, on_message=handle_message)
ws.on_open = lambda _: send_audio_data(ws, raw_audio_file)
ws.run_forever()
def send_audio_data(ws, audio_file_path):
# Read audio data from a file and send it through websocket
fileSize = os.path.getsize(audio_file_path)
with open(audio_file_path, "rb") as f:
while True:
chunk = f.read(1024)
if not chunk:
break
base64_chunk = base64.b64encode(chunk).decode("utf-8")
audio_chunk = {
"data": f"data:audio/pcm;base64,{base64_chunk}",
"last": f.tell() == fileSize
}
json_chunk = json.dumps(audio_chunk)
ws.send(json_chunk)
def handle_message(ws, message):
try:
data = json.loads(message)
if "event" in data:
print(f"Event received: {json.dumps(data['event'])}")
elif "result" in data:
print(f"Result received: {json.dumps(data['result'])}")
elif "error" in data:
print(f"Error received: {json.dumps(data['error'])}")
else:
print("Unknown message type received.")
except json.JSONDecodeError as ex:
print(f"Failed to parse message: {ex}")
if __name__ == "__main__":
asyncio.run(main())