Cookie CSS

Saturday, December 12, 2015

Creating your first UCMA application - Part V

Last week we left off on our series by answering the calling and saying something to the caller.  This week, we will learn how to listen for the response.  So after we have prompted the user the rest of the code looks like this, making heavy use of my HearSomething library.

HearSomething hr = new HearSomething();

                string tohear = "";

                foreach (classUsers cu in _users)
                {
                    tohear += cu.Name + "," + cu.Hear + ",";
                }
                tohear = tohear.TrimEnd(',');

                hr.Start(_call, tohear);

                while (_call.State != CallState.Terminating && !done)
                {

                    int x = 0;

                    while (x < 1000 && string.IsNullOrEmpty(hr._whatIHeard))
                    {
                        Thread.Sleep(10);
                        x++;
                    }

                    if (string.IsNullOrEmpty(hr._whatIHeard))
                    {
                        SaySomething says4 = new SaySomething(_call, "Sorry I didn't find any matching employee, can you please try again?");
                        says4.Start();

                        hr._whatIHeard = "";

                        hr.Start(_call, tohear);
                    }
                    else
                    {
                        string fnd = hr._whatIHeard.ToUpper();

                        foreach (classUsers cu in _users)
                        {
                            try
                            {
                                if (cu.Name.ToUpper() == fnd | cu.Hear.ToUpper() == fnd)
                                {
                                    CallTransferOptions unattendedTransferOptions = new CallTransferOptions(CallTransferType.Unattended);
                                    
                                    _logger.Log("Transfering call to " + cu.Name);
                                    done = true;

                                    SaySomething says5 = new SaySomething(_call, "Transferring your call to " + cu.Name +", stand by");
                                    says5.Start();

                                    _call.BeginTransfer(cu.Uri, myar => {

                                        try
                                        {
                                            _call.EndTransfer(myar);
                                        }
                                        catch { }

                                    
                                    }, null);

                                }
                            }
                            catch { }
                        }
                    }

                    Thread.Sleep(50);
                }


            }
            catch { }
        }

The tohear var is important here, this is where I push a comma delimeted series of strings to listen for.  In my case I push an employee name, and phonetic version of that name to the variable.  This is what the HearSomething library will use to better identify what the caller says.  Once I get a match I transfer the call to the requested user.

Here is a quick peak at the HearSomething code.

using Microsoft.Rtc.Collaboration.AudioVideo;
using Microsoft.Speech.AudioFormat;
using Microsoft.Speech.Recognition;
using Microsoft.Speech.Synthesis;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace BOCOperator
{
    internal class HearSomething
    {
        public string _whatIHeard = "";
        AudioVideoCall _call;
        ILogger _logger = new ConsoleLogger();
        SpeechRecognitionEngine _speechRecognitionEngine;

        internal string Start(AudioVideoCall _call, string choices)
        {
            _whatIHeard = "";
            SpeechRecognitionConnector speechRecognitionConnector = new SpeechRecognitionConnector();
            try
            {
                while (_call.Flow == null)
                {
                    Thread.Sleep(10);
                }

                speechRecognitionConnector.AttachFlow(_call.Flow);

                SpeechRecognitionStream stream = speechRecognitionConnector.Start();
                _speechRecognitionEngine = new SpeechRecognitionEngine();
                _speechRecognitionEngine.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(speechRecognitionEngine_SpeechRecognized);
                //_speechRecognitionEngine.LoadGrammarCompleted += new EventHandler<LoadGrammarCompletedEventArgs>(speechRecognitionEngine_LoadGrammarCompleted);
                _logger.Log("Building Speech Engine");

                var opt = new Choices();

                foreach (string s in choices.Split(','))
                {
                    opt.Add(new SemanticResultValue(s, s));
                }

                var gb = new GrammarBuilder();

                gb.Append(opt);

                Grammar g = new Grammar(gb);

                _speechRecognitionEngine.LoadGrammar(g);

                _logger.Log("Loading Grammar");

                SpeechAudioFormatInfo speechAudioFormatInfo = new SpeechAudioFormatInfo(8000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);
                _speechRecognitionEngine.SetInputToAudioStream(stream, speechAudioFormatInfo);

                _speechRecognitionEngine.RecognizeAsync(RecognizeMode.Single);

                int x = 0;
                while (x < 1000 && string.IsNullOrEmpty(_whatIHeard))
                {
                    Thread.Sleep(10);
                    x++;
                }

                _speechRecognitionEngine.SpeechRecognized -= new EventHandler<SpeechRecognizedEventArgs>(speechRecognitionEngine_SpeechRecognized);

                try
                {
                    speechRecognitionConnector.DetachFlow();
                }
                catch { }

                try
                {
                    _speechRecognitionEngine.Dispose();
                }
                catch { }

                try
                {
                    speechRecognitionConnector.Dispose();
                }
                catch { }

            }
            catch (Exception ex)
            {
                _logger.Log(ex.Message);
            }


            return _whatIHeard;

        }

        void speechRecognitionEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            try
            {
                RecognitionResult recoResult = e.Result;
                _logger.Log("Heard: " + recoResult.Text);
                _whatIHeard = recoResult.Text;
            }
            catch { }


        }
    }
}

This should give you a pretty good idea of how the speech engine handles speech to text for processing caller voice prompts.

Doug Routledge, C# Lync, Skype for Business, SQL, Exchange, UC Developer  BridgeOC
Twitter - @droutledge @ndbridge


No comments:

Post a Comment

Any spam comments will be deleted and your user account will be disabled.