Voice Biometrics ‎

Enroll

This example demonstrates voice biometrics for user enrollment. The VoiceBiometrics class handles configuration, enrollment, and streaming audio data to a server via WebSocket. This process involves sending user data, streaming encoded audio chunks, and handling server messages to indicate success or failure.

C# Example

C#

using System.Text.Json;
using System.Net.WebSockets;
using System.Text;

new VoiceBiometrics().Enroll().Wait();

class VoiceBiometrics
{
    const string ModelName = "Portal";
    const string ModelType = "text_independent";
    const string UserName = "Mike";
    const string RawAudioFile = @"C:\Users\vivoka\Music\record.raw";
    const string EnrollUri = "http://localhost:39806/v1/voice-biometrics/enroll";
    const string AudioDataHeader = "data:audio/pcm;base64,";

    public string Token { get; private set; } = "";
    public bool Success { get; private set; } = false;

    public async Task Enroll()
    {
        using (var client = new HttpClient())
        {
            var request = new { model = ModelName, model_type = ModelType, user = UserName };
            var content = new StringContent(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json");
            var response = await client.PostAsync(EnrollUri, content);
            response.EnsureSuccessStatusCode();
            var responseBody = await response.Content.ReadAsStringAsync();
            var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
            Token = jsonResponse.GetProperty("token").ToString();
        }

        using (var webSocket = new ClientWebSocket())
        {
            await webSocket.ConnectAsync(new Uri($"ws://localhost:39806/v1/ws/{Token}"), CancellationToken.None);
            var sending = Task.Run(() => StreamAudioFile(webSocket));
            var receiving = Task.Run(() => ReceivePackets(webSocket));
            await Task.WhenAll(sending, receiving);
        }
    }

    private void OnMsgReceived(string message)
    {
        try
        {
            using (var document = JsonDocument.Parse(message))
            {
                var root = document.RootElement;
                if (root.TryGetProperty("event", out JsonElement eventElement))
                {
                    if (eventElement.TryGetProperty("progress", out JsonElement progressElement))
                        Success = progressElement.GetInt32() >= 100;
                        
                    Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
                }
                else if (root.TryGetProperty("result", out JsonElement resultElement))
                    Console.WriteLine($"Result received: {JsonSerializer.Serialize(resultElement)}");
                else if (root.TryGetProperty("error", out JsonElement errorElement))
                    Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
                else
                    Console.WriteLine("Unknown message type received.");
            }
        }
        catch (JsonException ex)
        {
            Console.WriteLine($"Failed to parse message: {ex.Message}");
        }
    }

    private async Task StreamAudioFile(ClientWebSocket webSocket)
    {
        // Read audio data from a file and send it throw websocket
        using (var fs = File.OpenRead(RawAudioFile))
        {
            int bytesRead;
            var buffer = new byte[1024];
            while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
            {
                var base64 = Convert.ToBase64String(new ArraySegment<byte>(buffer, 0, bytesRead));
                var audioChunk = new
                {
                    data = $"{AudioDataHeader}{base64}",
                    last = fs.Position == fs.Length
                };
                var json = JsonSerializer.Serialize(audioChunk);
                var bytes = Encoding.UTF8.GetBytes(json);
                await webSocket.SendAsync(new ArraySegment<byte>(bytes), WebSocketMessageType.Text, true, CancellationToken.None);
            }
        }
    }

    private async Task ReceivePackets(ClientWebSocket webSocket)
    {
        var message = "";
        var buffer = new byte[1024];
        while (webSocket.State == WebSocketState.Open)
        {
            var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
            if (packet.MessageType == WebSocketMessageType.Close)
            {
                await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
                break;
            }
            else
            {
                message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
                if (packet.EndOfMessage)
                {
                    OnMsgReceived(message);
                    message = "";
                }
            }
        }
    }
}

Python Example

PY

import asyncio
import base64
import json
import os
import requests
import websockets

class VoiceBiometrics:
    model_name = "Portal"
    model_type = "text_independent"
    username = "Mike"
    raw_audio_file = r"C:\Users\vivoka\Music\record.raw"
    enroll_uri = "http://localhost:39806/v1/voice-biometrics/enroll"
    audio_data_header = "data:audio/pcm;base64,"

    def __init__(self):
        self.token = ""
        self.success = False

    async def enroll(self):
        # Send HTTP POST request to enroll
        request_data = {
            "model": self.model_name,
            "model_type": self.model_type,
            "user": self.username
        }
        response = requests.post(self.enroll_uri, json=request_data)
        response.raise_for_status()
        json_response = response.json()
        self.token = json_response["token"]

        # Connect to WebSocket
        web_socket_url = f"ws://localhost:39806/v1/ws/{self.token}"
        async with websockets.connect(web_socket_url) as websocket:
            sending = asyncio.create_task(self._stream_audio_file(websocket))
            receiving = asyncio.create_task(self._on_msg_received(websocket))
            await asyncio.gather(sending, receiving)

    async def _on_msg_received(self, websocket):
        try:
            async for message in websocket:
                message_json = json.loads(message)
                if "event" in message_json:
                    event = message_json["event"]
                    if "progress" in event:
                        self.success = event["progress"] >= 100
                    print(f"Event received: {json.dumps(event)}")
                elif "result" in message_json:
                    print(f"Result received: {json.dumps(message_json['result'])}")
                elif "error" in message_json:
                    print(f"Error received: {json.dumps(message_json['error'])}")
                else:
                    print("Unknown message type received.")
        except json.JSONDecodeError as ex:
            print(f"Failed to parse message: {ex}")

    async def _stream_audio_file(self, websocket):
        # Read audio data from a file and send it through websocket
        fileSize = os.path.getsize(self.raw_audio_file)
        with open(self.raw_audio_file, 'rb') as fs:
            while True:
                buffer = fs.read(1024)
                if not buffer:
                    break
                base64_data = base64.b64encode(buffer).decode('utf-8')
                audio_chunk = {
                    "data": f"{self.audio_data_header}{base64_data}",
                    "last": fs.tell() == fileSize
                }
                await websocket.send(json.dumps(audio_chunk))
                await asyncio.sleep(0.01)  # To prevent overwhelming the server

if __name__ == "__main__":
    biometrics = VoiceBiometrics()
    asyncio.run(biometrics.enroll())

Authentication

This example implements voice biometrics authentication using a text-independent model. It defines a VoiceBiometrics class with an authenticate method that initializes authentication, establishes a WebSocket connection, streams audio data, and handles server messages. The process includes sending voice data in base64 format and receiving authentication results or errors.

C# Example

C#

using System.Text.Json;
using System.Net.WebSockets;
using System.Text;

new VoiceBiometrics().Authentificate().Wait();

class VoiceBiometrics
{
    const string ModelName = "Portal";
    const string UserName = "Mike";
    const string RawAudioFile = @"C:\Users\vivoka\Music\record.raw";
    const string AuthenticateUri = "http://localhost:39806/v1/voice-biometrics/authenticate";
    const string AudioDataHeader = "data:audio/pcm;base64,";

    public string Token { get; private set; } = "";

    public async Task Authentificate()
    {
        using (var client = new HttpClient())
        {
            var request = new { model = ModelName, user = UserName };
            var content = new StringContent(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json");
            var response = await client.PostAsync(AuthenticateUri, content);
            response.EnsureSuccessStatusCode();
            var responseBody = await response.Content.ReadAsStringAsync();
            var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
            Token = jsonResponse.GetProperty("token").ToString();
        }

        using (var webSocket = new ClientWebSocket())
        {
            await webSocket.ConnectAsync(new Uri($"ws://localhost:39806/v1/ws/{Token}"), CancellationToken.None);
            var sending = Task.Run(() => StreamAudioFile(webSocket));
            var receiving = Task.Run(() => ReceivePackets(webSocket));
            await Task.WhenAll(sending, receiving);
        }
    }

    private void OnMsgReceived(string message)
    {
        try
        {
            using (var document = JsonDocument.Parse(message))
            {
                var root = document.RootElement;
                if (root.TryGetProperty("event", out JsonElement eventElement))
                    Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
                else if (root.TryGetProperty("result", out JsonElement resultElement))
                    Console.WriteLine($"Result received: {JsonSerializer.Serialize(resultElement)}");
                else if (root.TryGetProperty("error", out JsonElement errorElement))
                    Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
                else
                    Console.WriteLine("Unknown message type received.");
            }
        }
        catch (JsonException ex)
        {
            Console.WriteLine($"Failed to parse message: {ex.Message}");
        }
    }

    private async Task StreamAudioFile(ClientWebSocket webSocket)
    {
        // Read audio data from a file and send it throw websocket
        using (var fs = File.OpenRead(RawAudioFile))
        {
            int bytesRead;
            var buffer = new byte[1024];
            while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
            {
                var base64 = Convert.ToBase64String(new ArraySegment<byte>(buffer, 0, bytesRead));
                var audioChunk = new
                {
                    data = $"{AudioDataHeader}{base64}",
                    last = fs.Position == fs.Length
                };
                var json = JsonSerializer.Serialize(audioChunk);
                var bytes = Encoding.UTF8.GetBytes(json);
                if (webSocket.State == WebSocketState.Open)
                    await webSocket.SendAsync(new ArraySegment<byte>(bytes), WebSocketMessageType.Text, true, CancellationToken.None);
                else
                    break;
            }
        }
    }

    private async Task ReceivePackets(ClientWebSocket webSocket)
    {
        var message = "";
        var buffer = new byte[1024];
        while (webSocket.State == WebSocketState.Open)
        {
            var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
            if (packet.MessageType == WebSocketMessageType.Close)
            {
                await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
                break;
            }
            else
            {
                message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
                if (packet.EndOfMessage)
                {
                    OnMsgReceived(message);
                    message = "";
                }
            }
        }
    }
}

Python Example

PY

import asyncio
import base64
import json
import os
import requests
import websockets

class VoiceBiometrics:
    model_name = "Portal"
    model_type = "text_independent"
    username = "Mike"
    raw_audio_file = r"C:\Users\vivoka\Music\record.raw"
    authenticate_uri = "http://localhost:39806/v1/voice-biometrics/authenticate"
    audio_data_header = "data:audio/pcm;base64,"

    def __init__(self):
        self.token = ""
        self.success = False

    async def authenticate(self):
        # Send HTTP POST request to authenticate
        request_data = {
            "model": self.model_name,
            "user": self.username
        }
        response = requests.post(self.authenticate_uri, json=request_data)
        response.raise_for_status()
        json_response = response.json()
        self.token = json_response["token"]

        # Connect to WebSocket
        web_socket_url = f"ws://localhost:39806/v1/ws/{self.token}"
        async with websockets.connect(web_socket_url) as websocket:
            sending = asyncio.create_task(self._stream_audio_file(websocket))
            receiving = asyncio.create_task(self._on_msg_received(websocket))
            await asyncio.gather(sending, receiving)

    async def _on_msg_received(self, websocket):
        try:
            async for message in websocket:
                message_json = json.loads(message)
                if "event" in message_json:
                    print(f"Event received: {json.dumps(message_json['event'])}")
                elif "result" in message_json:
                    print(f"Result received: {json.dumps(message_json['result'])}")
                elif "error" in message_json:
                    print(f"Error received: {json.dumps(message_json['error'])}")
                else:
                    print("Unknown message type received.")
        except json.JSONDecodeError as ex:
            print(f"Failed to parse message: {ex}")

    async def _stream_audio_file(self, websocket):
        # Read audio data from a file and send it through websocket
        fileSize = os.path.getsize(self.raw_audio_file)
        with open(self.raw_audio_file, 'rb') as fs:
            while True:
                buffer = fs.read(1024)
                if not buffer:
                    break
                base64_data = base64.b64encode(buffer).decode('utf-8')
                audio_chunk = {
                    "data": f"{self.audio_data_header}{base64_data}",
                    "last": fs.tell() == fileSize
                }
                await websocket.send(json.dumps(audio_chunk))
                await asyncio.sleep(0.01)  # To prevent overwhelming the server

if __name__ == "__main__":
    biometrics = VoiceBiometrics()
    asyncio.run(biometrics.authenticate())

Identification

This example implements voice biometrics identification, allowing users to be identified by providing a raw audio file of their voice. The VoiceBiometrics class handles user data, server details, and performs identification by initializing an HTTP request for a token, establishing a WebSocket connection, streaming audio data, and processing server messages.

C# Example

C#

using System.Text.Json;
using System.Net.WebSockets;
using System.Text;

new VoiceBiometrics().Identify().Wait();

class VoiceBiometrics
{
    const string ModelName = "Portal";
    const string RawAudioFile = @"C:\Users\vivoka\Music\record.raw";
    const string IdentifyUri = "http://localhost:39806/v1/voice-biometrics/identify";
    const string AudioDataHeader = "data:audio/pcm;base64,";

    public string Token { get; private set; } = "";

    public async Task Identify()
    {
        using (var client = new HttpClient())
        {
            var request = new { model = ModelName };
            var content = new StringContent(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json");
            var response = await client.PostAsync(IdentifyUri, content);
            response.EnsureSuccessStatusCode();
            var responseBody = await response.Content.ReadAsStringAsync();
            var jsonResponse = JsonDocument.Parse(responseBody).RootElement;
            Token = jsonResponse.GetProperty("token").ToString();
        }

        using (var webSocket = new ClientWebSocket())
        {
            await webSocket.ConnectAsync(new Uri($"ws://localhost:39806/v1/ws/{Token}"), CancellationToken.None);
            var sending = Task.Run(() => StreamAudioFile(webSocket));
            var receiving = Task.Run(() => ReceivePackets(webSocket));
            await Task.WhenAll(sending, receiving);
        }
    }

    private void OnMsgReceived(string message)
    {
        try
        {
            using (var document = JsonDocument.Parse(message))
            {
                var root = document.RootElement;
                if (root.TryGetProperty("event", out JsonElement eventElement))
                    Console.WriteLine($"Event received: {JsonSerializer.Serialize(eventElement)}");
                else if (root.TryGetProperty("result", out JsonElement resultElement))
                    Console.WriteLine($"Result received: {JsonSerializer.Serialize(resultElement)}");
                else if (root.TryGetProperty("error", out JsonElement errorElement))
                    Console.WriteLine($"Error received: {JsonSerializer.Serialize(errorElement)}");
                else
                    Console.WriteLine("Unknown message type received.");
            }
        }
        catch (JsonException ex)
        {
            Console.WriteLine($"Failed to parse message: {ex.Message}");
        }
    }

    private async Task StreamAudioFile(ClientWebSocket webSocket)
    {
        // Read audio data from a file and send it throw websocket
        using (var fs = File.OpenRead(RawAudioFile))
        {
            int bytesRead;
            var buffer = new byte[1024];
            while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
            {
                var base64 = Convert.ToBase64String(new ArraySegment<byte>(buffer, 0, bytesRead));
                var audioChunk = new
                {
                    data = $"{AudioDataHeader}{base64}",
                    last = fs.Position == fs.Length
                };
                var json = JsonSerializer.Serialize(audioChunk);
                var bytes = Encoding.UTF8.GetBytes(json);
                if (webSocket.State == WebSocketState.Open)
                    await webSocket.SendAsync(new ArraySegment<byte>(bytes), WebSocketMessageType.Text, true, CancellationToken.None);
                else
                    break;
            }
        }
    }

    private async Task ReceivePackets(ClientWebSocket webSocket)
    {
        var message = "";
        var buffer = new byte[1024];
        while (webSocket.State == WebSocketState.Open)
        {
            var packet = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
            if (packet.MessageType == WebSocketMessageType.Close)
            {
                await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", CancellationToken.None);
                break;
            }
            else
            {
                message = (message.Length > 0 ? message : "") + Encoding.UTF8.GetString(buffer, 0, packet.Count);
                if (packet.EndOfMessage)
                {
                    OnMsgReceived(message);
                    message = "";
                }
            }
        }
    }
}

Python Example

PY

import asyncio
import base64
import json
import os
import requests
import websockets

class VoiceBiometrics:
    model_name = "Portal"
    model_type = "text_independent"
    username = "Mike"
    raw_audio_file = r"C:\Users\vivoka\Music\record.raw"
    identify_uri = "http://localhost:39806/v1/voice-biometrics/identify"
    audio_data_header = "data:audio/pcm;base64,"

    def __init__(self):
        self.token = ""
        self.success = False

    async def identify(self):
        # Send HTTP POST request to authenticate
        request_data = { "model": self.model_name }
        response = requests.post(self.identify_uri, json=request_data)
        response.raise_for_status()
        json_response = response.json()
        self.token = json_response["token"]

        # Connect to WebSocket
        web_socket_url = f"ws://localhost:39806/v1/ws/{self.token}"
        async with websockets.connect(web_socket_url) as websocket:
            sending = asyncio.create_task(self._stream_audio_file(websocket))
            receiving = asyncio.create_task(self._on_msg_received(websocket))
            await asyncio.gather(sending, receiving)

    async def _on_msg_received(self, websocket):
        try:
            async for message in websocket:
                message_json = json.loads(message)
                if "event" in message_json:
                    print(f"Event received: {json.dumps(message_json['event'])}")
                elif "result" in message_json:
                    print(f"Result received: {json.dumps(message_json['result'])}")
                elif "error" in message_json:
                    print(f"Error received: {json.dumps(message_json['error'])}")
                else:
                    print("Unknown message type received.")
        except json.JSONDecodeError as ex:
            print(f"Failed to parse message: {ex}")

    async def _stream_audio_file(self, websocket):
        # Read audio data from a file and send it through websocket
        fileSize = os.path.getsize(self.raw_audio_file)
        with open(self.raw_audio_file, 'rb') as fs:
            while True:
                buffer = fs.read(1024)
                if not buffer:
                    break
                base64_data = base64.b64encode(buffer).decode('utf-8')
                audio_chunk = {
                    "data": f"{self.audio_data_header}{base64_data}",
                    "last": fs.tell() == fileSize
                }
                await websocket.send(json.dumps(audio_chunk))
                await asyncio.sleep(0.01)  # To prevent overwhelming the server

if __name__ == "__main__":
    biometrics = VoiceBiometrics()
    asyncio.run(biometrics.identify())