SaySomething says = new SaySomething(_call, "Hello, I am the Bridge Communications virtual operator. I am hear to help you find the right person. Please tell me the name of the person you are searching for, or the reason for your call.");
says.Start();
This is a procedure I use to connect to the audio stream of the call, and use the Speech capabilities in UCMA to talk to the caller. Here is what it looks like. You see it has 2 methods Start() which is tts, and StartWAV which plays a pre-recorded audio file.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Rtc.Collaboration;
using Microsoft.Rtc.Signaling;
using Microsoft.Rtc.Collaboration.AudioVideo;
using Microsoft.Speech.Synthesis;
using Microsoft.Speech.AudioFormat;
using System.Threading;
namespace BOCOperator
{
internal class SaySomething
{
AudioVideoCall _call;
SpeechSynthesizer _speechSynthesizer;
string _whattosay;
ILogger _logger = new ConsoleLogger();
internal SaySomething(AudioVideoCall call, string whattosay)
{
_call = call;
_whattosay = whattosay;
}
internal void Start()
{
SpeechSynthesisConnector speechSynthesisConnector = new SpeechSynthesisConnector();
try
{
int x = 0;
while (_call.Flow == null | _call.Flow.State != MediaFlowState.Active)
{
Thread.Sleep(10);
x++;
if (x > 500)
break;
}
speechSynthesisConnector.AttachFlow(_call.Flow);
_speechSynthesizer = new SpeechSynthesizer();
SpeechAudioFormatInfo audioformat = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);
_speechSynthesizer.SetOutputToAudioStream(speechSynthesisConnector, audioformat);
speechSynthesisConnector.Start();
PromptBuilder prompt = new PromptBuilder();
prompt.AppendText(_whattosay);
_speechSynthesizer.Speak(prompt);
prompt.ClearContent();
speechSynthesisConnector.Stop();
speechSynthesisConnector.DetachFlow();
}
catch (Exception ex)
{
_logger.Log(ex.Message);
}
finally
{
try
{
speechSynthesisConnector.DetachFlow();
}
catch { }
}
}
internal void StartWAV(string wav)
{
SpeechSynthesisConnector speechSynthesisConnector = new SpeechSynthesisConnector();
try
{
int x = 0;
while (_call.Flow == null | _call.Flow.State != MediaFlowState.Active)
{
Thread.Sleep(10);
x++;
if (x > 500)
break;
}
speechSynthesisConnector.AttachFlow(_call.Flow);
_speechSynthesizer = new SpeechSynthesizer();
SpeechAudioFormatInfo audioformat = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);
_speechSynthesizer.SetOutputToAudioStream(speechSynthesisConnector, audioformat);
speechSynthesisConnector.Start();
PromptBuilder prompt = new PromptBuilder();
prompt.AppendAudio(wav);
_speechSynthesizer.Speak(prompt);
prompt.ClearContent();
speechSynthesisConnector.Stop();
speechSynthesisConnector.DetachFlow();
}
catch (Exception ex)
{
_logger.Log(ex.Message);
}
finally
{
try
{
speechSynthesisConnector.DetachFlow();
}
catch { }
}
}
}
}
If we focus on the Start for today, you can see it wait to make sure the call's media flow is active, then attach itself. From there we declare a new SpeechSynthesizer with the standard audio format for a telephone call. (No need for 3d dolby audio here, mono will do) From there we build a prompt (what to say) using the PromptBuilder and then call Speak passing the prompt as the parameter. Next week will continue with the call processing and look at listening for a response.
Doug Routledge, C# Lync, Skype for Business, SQL, Exchange, UC Developer BridgeOC Twitter - @droutledge @ndbridge
No comments:
Post a Comment
Any spam comments will be deleted and your user account will be disabled.